qlogicagent 2.7.0 → 2.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (115) hide show
  1. package/dist/agent.js +18 -18
  2. package/dist/cli.js +353 -413
  3. package/dist/index.js +373 -433
  4. package/dist/orchestration.js +6 -6
  5. package/dist/protocol.js +1 -1
  6. package/dist/types/agent/agent.d.ts +1 -1
  7. package/dist/types/agent/tool-loop.d.ts +1 -1
  8. package/dist/types/agent/tunable-defaults.d.ts +4 -0
  9. package/dist/types/agent/types.d.ts +7 -18
  10. package/dist/types/cli/handlers/agents-handler.d.ts +19 -0
  11. package/dist/types/cli/handlers/config-handler.d.ts +17 -0
  12. package/dist/types/cli/handlers/dream-handler.d.ts +5 -0
  13. package/dist/types/cli/handlers/files-handler.d.ts +15 -0
  14. package/dist/types/cli/handlers/media-handler.d.ts +13 -0
  15. package/dist/types/cli/handlers/memory-handler.d.ts +11 -0
  16. package/dist/types/cli/handlers/pet-handler.d.ts +21 -0
  17. package/dist/types/cli/handlers/product-handler.d.ts +32 -0
  18. package/dist/types/cli/handlers/project-handler.d.ts +19 -0
  19. package/dist/types/cli/handlers/session-handler.d.ts +38 -0
  20. package/dist/types/cli/handlers/settings-handler.d.ts +24 -0
  21. package/dist/types/cli/handlers/skills-handler.d.ts +20 -0
  22. package/dist/types/cli/handlers/solo-handler.d.ts +15 -0
  23. package/dist/types/cli/handlers/turn-handler.d.ts +17 -0
  24. package/dist/types/cli/main.d.ts +2 -2
  25. package/dist/types/cli/stdio-server.d.ts +64 -106
  26. package/dist/types/cli/tool-bootstrap.d.ts +4 -3
  27. package/dist/types/config/config.d.ts +1 -1
  28. package/dist/types/index.d.ts +2 -2
  29. package/dist/types/llm/index.d.ts +1 -31
  30. package/dist/types/orchestration/tool-cascade.d.ts +2 -2
  31. package/dist/types/protocol/methods.d.ts +153 -5
  32. package/dist/types/protocol/wire/agent-events.d.ts +2 -2
  33. package/dist/types/protocol/wire/agent-methods.d.ts +5 -3
  34. package/dist/types/protocol/wire/gateway-rpc.d.ts +77 -4
  35. package/dist/types/protocol/wire/index.d.ts +1 -1
  36. package/dist/types/protocol/wire/notification-payloads.d.ts +72 -1
  37. package/dist/types/runtime/execution/dream-agent.d.ts +1 -1
  38. package/dist/types/runtime/execution/forked-agent.d.ts +1 -1
  39. package/dist/types/runtime/hooks/context-compression.d.ts +1 -1
  40. package/dist/types/runtime/hooks/memory-hooks.d.ts +1 -1
  41. package/dist/types/runtime/infra/acp-types.d.ts +4 -0
  42. package/dist/types/runtime/infra/agent-paths.d.ts +22 -25
  43. package/dist/types/runtime/infra/agent-process.d.ts +1 -1
  44. package/dist/types/runtime/infra/builtin-providers.d.ts +36 -0
  45. package/dist/types/runtime/infra/checkpoint-backend.d.ts +1 -1
  46. package/dist/types/runtime/infra/index.d.ts +1 -2
  47. package/dist/types/runtime/infra/key-pool.d.ts +120 -0
  48. package/dist/types/runtime/infra/media-persistence.d.ts +26 -15
  49. package/dist/types/runtime/infra/model-registry.d.ts +187 -0
  50. package/dist/types/runtime/infra/project-instructions-store.d.ts +3 -3
  51. package/dist/types/runtime/infra/project-store.d.ts +3 -0
  52. package/dist/types/runtime/infra/token-budget.d.ts +2 -2
  53. package/dist/types/runtime/infra/worktree-backend.d.ts +1 -1
  54. package/dist/types/runtime/pet/index.d.ts +8 -0
  55. package/dist/types/runtime/pet/pet-context-injection.d.ts +8 -0
  56. package/dist/types/runtime/pet/pet-file-loader.d.ts +62 -0
  57. package/dist/types/runtime/pet/pet-growth-engine.d.ts +60 -0
  58. package/dist/types/runtime/pet/pet-reaction-service.d.ts +33 -0
  59. package/dist/types/runtime/pet/pet-soul-service.d.ts +71 -0
  60. package/dist/types/runtime/session/group-session-split.d.ts +38 -0
  61. package/dist/types/runtime/session/index.d.ts +4 -2
  62. package/dist/types/runtime/session/session-locator.d.ts +24 -0
  63. package/dist/types/runtime/session/session-memory.d.ts +4 -4
  64. package/dist/types/runtime/session/session-persistence.d.ts +47 -46
  65. package/dist/types/runtime/session/session-state.d.ts +3 -5
  66. package/dist/types/skills/memory/local-memory-provider.d.ts +1 -1
  67. package/dist/types/skills/memory/local-store.d.ts +3 -2
  68. package/dist/types/skills/memory/memdir.d.ts +7 -4
  69. package/dist/types/skills/memory/memory-provider-factory.d.ts +2 -8
  70. package/dist/types/skills/permissions/denial-audit-log.d.ts +1 -1
  71. package/dist/types/skills/permissions/permission-classifier.d.ts +1 -1
  72. package/dist/types/skills/tools/search-tool.d.ts +1 -1
  73. package/dist/types/skills/tools.d.ts +3 -3
  74. package/dist/types/transport/acp-event-emitter.d.ts +1 -1
  75. package/dist/types/transport/acp-server.d.ts +2 -2
  76. package/package.json +2 -1
  77. package/dist/types/llm/adapters/aliyun-oss-file-upload-adapter.d.ts +0 -44
  78. package/dist/types/llm/adapters/gemini-file-upload-adapter.d.ts +0 -26
  79. package/dist/types/llm/adapters/hub-oss-file-upload-adapter.d.ts +0 -29
  80. package/dist/types/llm/adapters/index.d.ts +0 -10
  81. package/dist/types/llm/adapters/openai-file-upload-adapter.d.ts +0 -38
  82. package/dist/types/llm/adapters/volcengine-file-upload-adapter.d.ts +0 -24
  83. package/dist/types/llm/builtin-providers.d.ts +0 -10
  84. package/dist/types/llm/debug-transport.d.ts +0 -12
  85. package/dist/types/llm/file-upload-service.d.ts +0 -68
  86. package/dist/types/llm/gemini-schema-utils.d.ts +0 -17
  87. package/dist/types/llm/llm-client.d.ts +0 -43
  88. package/dist/types/llm/media-client.d.ts +0 -42
  89. package/dist/types/llm/media-transport.d.ts +0 -176
  90. package/dist/types/llm/model-catalog.d.ts +0 -82
  91. package/dist/types/llm/model-detection.d.ts +0 -22
  92. package/dist/types/llm/provider-def.d.ts +0 -203
  93. package/dist/types/llm/provider-registry.d.ts +0 -59
  94. package/dist/types/llm/provider-tool-api.d.ts +0 -44
  95. package/dist/types/llm/retry.d.ts +0 -37
  96. package/dist/types/llm/transport.d.ts +0 -281
  97. package/dist/types/llm/transports/anthropic-messages.d.ts +0 -65
  98. package/dist/types/llm/transports/gemini-cache-api.d.ts +0 -86
  99. package/dist/types/llm/transports/gemini-file-api.d.ts +0 -90
  100. package/dist/types/llm/transports/gemini-generatecontent.d.ts +0 -56
  101. package/dist/types/llm/transports/gemini-lyria-realtime.d.ts +0 -117
  102. package/dist/types/llm/transports/gemini-media.d.ts +0 -53
  103. package/dist/types/llm/transports/media-resolve.d.ts +0 -50
  104. package/dist/types/llm/transports/minimax-media.d.ts +0 -55
  105. package/dist/types/llm/transports/openai-chat.d.ts +0 -81
  106. package/dist/types/llm/transports/openai-media.d.ts +0 -24
  107. package/dist/types/llm/transports/openai-responses.d.ts +0 -63
  108. package/dist/types/llm/transports/qwen-media.d.ts +0 -50
  109. package/dist/types/llm/transports/realtime-transport.d.ts +0 -183
  110. package/dist/types/llm/transports/volcengine-grounding.d.ts +0 -58
  111. package/dist/types/llm/transports/volcengine-media.d.ts +0 -93
  112. package/dist/types/llm/transports/volcengine-responses.d.ts +0 -64
  113. package/dist/types/llm/transports/zhipu-media.d.ts +0 -82
  114. package/dist/types/llm/transports/zhipu-tool-api.d.ts +0 -35
  115. package/dist/types/runtime/infra/project-plan-store.d.ts +0 -27
@@ -1,53 +0,0 @@
1
- /**
2
- * Gemini Media Transport — unified media generation for all Gemini media APIs.
3
- *
4
- * Supported media types and endpoints:
5
- * image — POST /models/{model}:generateContent (responseModalities: ["TEXT","IMAGE"])
6
- * video — POST /models/{model}:predictLongRunning → poll operations → download URI
7
- * music — POST /models/{model}:generateContent (Lyria 3 — inlineData audio)
8
- * music_realtime — WebSocket session (Lyria RealTime — streaming PCM → WAV)
9
- * tts — POST /models/{model}:generateContent (speechConfig — inlineData PCM)
10
- * embedding — POST /models/{model}:embedContent (float vector)
11
- *
12
- * Auth: x-goog-api-key header for all endpoints.
13
- */
14
- import type { AsyncMediaTransport, MediaRequest, MediaResult, MediaType } from "../media-transport.js";
15
- export interface GeminiMediaConfig {
16
- /** Base URL, e.g. "https://generativelanguage.googleapis.com/v1beta" */
17
- baseUrl: string;
18
- timeoutMs?: number;
19
- }
20
- export declare class GeminiMediaTransport implements AsyncMediaTransport {
21
- readonly supportedTypes: readonly MediaType[];
22
- private apiBase;
23
- private timeoutMs;
24
- constructor(config: GeminiMediaConfig);
25
- generate(request: MediaRequest, apiKey: string, signal?: AbortSignal): Promise<MediaResult>;
26
- deleteVideoTask(_taskId: string, _apiKey: string, _signal?: AbortSignal): Promise<void>;
27
- listVideoTasks(_apiKey: string, _options?: {
28
- after?: string;
29
- limit?: number;
30
- status?: string;
31
- }, _signal?: AbortSignal): Promise<Record<string, unknown>>;
32
- getTaskStatus(taskId: string, apiKey: string, signal?: AbortSignal): Promise<{
33
- status: string;
34
- task: Record<string, unknown>;
35
- }>;
36
- private generateImage;
37
- private generateVideo;
38
- private generateMusic;
39
- private generateMusicRealtime;
40
- private generateTTS;
41
- private generateEmbedding;
42
- private postJson;
43
- private pollOperation;
44
- /**
45
- * Resolve an image URL to inline data for the Veo API.
46
- * Supports file:// paths and https:// URLs.
47
- */
48
- private resolveImageData;
49
- /** Extract base64 image data from generateContent response → persist to cache files. */
50
- private extractInlineImages;
51
- /** Extract base64 audio data from generateContent response → persist to cache files. */
52
- private extractInlineAudio;
53
- }
@@ -1,50 +0,0 @@
1
- /**
2
- * Media URL Resolution — resolves local/private URLs for cloud LLM APIs.
3
- *
4
- * Cloud LLM APIs (OpenAI, Anthropic, DeepSeek, etc.) cannot fetch content
5
- * from localhost or private networks.
6
- *
7
- * **Primary approach (upload-based):**
8
- * Local URLs are fetched → uploaded to provider's File API or Aliyun OSS → public URL returned.
9
- * This is the ONLY approach for images, video, and documents.
10
- *
11
- * **Audio exception (base64):**
12
- * OpenAI's input_audio.data API field mandates base64 encoding.
13
- * resolveMediaUrl() is retained ONLY for this case.
14
- *
15
- * URL-first design: the gateway stores media as HTTP URLs; this layer handles
16
- * the last-mile transformation before sending to provider APIs.
17
- */
18
- import type { FileUploadAdapter } from "../file-upload-service.js";
19
- /** Check if a URL points to a local/private address that cloud APIs cannot reach. */
20
- export declare function isLocalUrl(url: string): boolean;
21
- export interface MediaResolveContext {
22
- /** Provider-specific upload adapter. */
23
- uploadAdapter: FileUploadAdapter;
24
- /** API key for the upload. */
25
- apiKey: string;
26
- /** Abort signal. */
27
- signal?: AbortSignal;
28
- }
29
- /**
30
- * Resolve a local URL by uploading to the provider's File API.
31
- * This is the **preferred** method for all media types.
32
- *
33
- * Public URLs pass through unchanged.
34
- * Local URLs are uploaded → public URL or file_id returned.
35
- */
36
- export declare function resolveMediaUrlViaUpload(url: string, ctx: MediaResolveContext): Promise<string>;
37
- /**
38
- * Batch-resolve URLs via upload. Best-effort: failures return original URL.
39
- */
40
- export declare function resolveMediaUrlsViaUpload(urls: string[], ctx: MediaResolveContext): Promise<string[]>;
41
- /**
42
- * Resolve local audio URL to base64 data URL.
43
- *
44
- * ONLY for audio — OpenAI's input_audio.data API field mandates base64 encoding.
45
- * There is no upload alternative for audio in the OpenAI API.
46
- *
47
- * For images/video/documents: ALWAYS use resolveMediaUrlViaUpload() instead.
48
- * base64 is forbidden for non-audio media per project architecture rules.
49
- */
50
- export declare function resolveMediaUrl(url: string, fallbackMime?: string): Promise<string>;
@@ -1,55 +0,0 @@
1
- /**
2
- * MiniMax Media Transport — Music + Video Generation.
3
- *
4
- * Music: POST /v1/music_generation (sync or async poll)
5
- * Video: POST /v1/video_generation (4 modes: text, image, first-last-frame, subject-ref)
6
- * Video Query: GET /v1/query/video_generation?task_id=XXX
7
- * File Retrieve: GET /v1/files/retrieve?file_id=XXX (get download_url)
8
- *
9
- * Auth: Authorization: Bearer $MINIMAX_API_KEY
10
- * Docs: minimax-ProviderMax.md §13-18 (video), §21 (music), §24-28 (files)
11
- */
12
- import type { AsyncMediaTransport, MediaRequest, MediaResult, MediaType } from "../media-transport.js";
13
- export interface MiniMaxMediaConfig {
14
- /** Base URL, e.g. "https://api.minimaxi.com" */
15
- baseUrl: string;
16
- timeoutMs?: number;
17
- }
18
- export declare class MiniMaxMediaTransport implements AsyncMediaTransport {
19
- readonly supportedTypes: readonly MediaType[];
20
- private baseUrl;
21
- private timeoutMs;
22
- constructor(config: MiniMaxMediaConfig);
23
- generate(request: MediaRequest, apiKey: string, signal?: AbortSignal): Promise<MediaResult>;
24
- private generateMusic;
25
- private pollTask;
26
- /**
27
- * Generate lyrics from a text prompt via MiniMax Lyrics Generation API.
28
- * POST /v1/lyrics_generation — returns structured lyrics with tags.
29
- */
30
- generateLyrics(prompt: string, apiKey: string, signal?: AbortSignal): Promise<string>;
31
- private generateVideo;
32
- private pollVideoTask;
33
- private getFileDownloadUrl;
34
- /**
35
- * Query a single video task by ID.
36
- * GET /v1/query/video_generation?task_id=XXX
37
- */
38
- getTaskStatus(taskId: string, apiKey: string, signal?: AbortSignal): Promise<{
39
- status: string;
40
- task: Record<string, unknown>;
41
- }>;
42
- /**
43
- * List tasks — MiniMax does not have a bulk list endpoint.
44
- * Each task must be queried individually with getTaskStatus().
45
- */
46
- listVideoTasks(_apiKey: string, _options?: {
47
- after?: string;
48
- limit?: number;
49
- status?: string;
50
- }, _signal?: AbortSignal): Promise<Record<string, unknown>>;
51
- /**
52
- * Cancel/delete is not natively supported by MiniMax video API.
53
- */
54
- deleteVideoTask(_taskId: string, _apiKey: string, _signal?: AbortSignal): Promise<void>;
55
- }
@@ -1,81 +0,0 @@
1
- /**
2
- * OpenAI Chat Completions Transport 鈥?SSE streaming implementation.
3
- *
4
- * Covers all OpenAI-compatible providers:
5
- * DeepSeek, Qwen, Minimax, Moonshot, OpenRouter, etc.
6
- *
7
- * POST {baseUrl}/v1/chat/completions with stream: true
8
- * Auth: Authorization: Bearer {apiKey}
9
- *
10
- * SSE format: lines prefixed with "data: ", JSON parsing per event.
11
- *
12
- * Adapted from admin-infer-proxy-client.ts SSE logic + Hermes openai_chat.py transport.
13
- */
14
- import type { LLMChunk, LLMRequest, LLMTransport, FIMRequest, FIMChunk } from "../transport.js";
15
- import type { ProviderQuirks } from "../provider-def.js";
16
- import type { FileUploadAdapter } from "../file-upload-service.js";
17
- export interface OpenAIChatTransportConfig {
18
- baseUrl: string;
19
- /** Additional headers (e.g. for specific providers) */
20
- extraHeaders?: Record<string, string>;
21
- /** Timeout in ms (default 180_000) */
22
- timeoutMs?: number;
23
- /** Whether to include stream_options (default true). Set false for providers that reject it. */
24
- supportsStreamOptions?: boolean;
25
- /** Whether to omit temperature when it equals 0 (e.g. Moonshot rejects 0) */
26
- omitZeroTemperature?: boolean;
27
- /** Provider-specific quirks (CC/altcode parity) */
28
- quirks?: ProviderQuirks;
29
- /** File upload adapter for resolving local media URLs via upload instead of base64. */
30
- fileUploadAdapter?: FileUploadAdapter;
31
- }
32
- export declare class OpenAIChatTransport implements LLMTransport {
33
- private baseUrl;
34
- private extraHeaders;
35
- private timeoutMs;
36
- private supportsStreamOptions;
37
- private omitZeroTemperature;
38
- private quirks;
39
- private fileUploadAdapter?;
40
- private cumulativeReasoningLen;
41
- private cumulativeContentLen;
42
- constructor(config: OpenAIChatTransportConfig);
43
- stream(request: LLMRequest, apiKey: string, signal?: AbortSignal): AsyncGenerator<LLMChunk>;
44
- private fetchAndStream;
45
- /**
46
- * Handle non-streaming JSON response from providers that ignore stream:true.
47
- * Synthesize the same LLMChunk events a streaming response would produce.
48
- */
49
- private handleNonStreamingResponse;
50
- /**
51
- * Parse SSE stream with 90s idle watchdog (CC parity).
52
- * If no data arrives within STREAM_IDLE_TIMEOUT_MS, throw to trigger retry.
53
- */
54
- private parseSSEStreamWithWatchdog;
55
- private processChunk;
56
- /**
57
- * FIM completion via /beta/v1/completions.
58
- * Only works with DeepSeek (requires supportsPrefixCompletion quirk).
59
- * Non-thinking mode only; max completion 4K tokens.
60
- */
61
- complete(request: FIMRequest, apiKey: string, signal?: AbortSignal): AsyncGenerator<FIMChunk>;
62
- /**
63
- * Upload a file for use in conversations (Kimi File API).
64
- * Returns a file_id that can be referenced in user messages.
65
- * POST /v1/files with multipart/form-data.
66
- */
67
- uploadFile(fileBlob: Blob, filename: string, purpose: string, apiKey: string, signal?: AbortSignal): Promise<{
68
- fileId: string;
69
- filename: string;
70
- bytes: number;
71
- }>;
72
- /**
73
- * Get file content/status 鈥?GET /v1/files/{file_id}
74
- */
75
- getFileInfo(fileId: string, apiKey: string, signal?: AbortSignal): Promise<{
76
- id: string;
77
- filename: string;
78
- bytes: number;
79
- status: string;
80
- }>;
81
- }
@@ -1,24 +0,0 @@
1
- /**
2
- * OpenAI Media Transport — Images API (gpt-image-2) + Audio Speech API (tts-1).
3
- *
4
- * Image: POST /v1/images/generations (sync response, returns URLs)
5
- * TTS: POST /v1/audio/speech (sync response, returns raw audio bytes)
6
- * Auth: Authorization: Bearer $OPENAI_API_KEY
7
- * Docs: https://platform.openai.com/docs/api-reference/images/create
8
- * https://platform.openai.com/docs/api-reference/audio/createSpeech
9
- */
10
- import type { MediaTransport, MediaRequest, MediaResult, MediaType } from "../media-transport.js";
11
- export interface OpenAIMediaConfig {
12
- /** Base URL, e.g. "https://api.openai.com" */
13
- baseUrl: string;
14
- timeoutMs?: number;
15
- }
16
- export declare class OpenAIMediaTransport implements MediaTransport {
17
- readonly supportedTypes: readonly MediaType[];
18
- private baseUrl;
19
- private timeoutMs;
20
- constructor(config: OpenAIMediaConfig);
21
- generate(request: MediaRequest, apiKey: string, signal?: AbortSignal): Promise<MediaResult>;
22
- private generateImage;
23
- private generateTTS;
24
- }
@@ -1,63 +0,0 @@
1
- /**
2
- * OpenAI Responses API Transport 鈥?SSE streaming implementation.
3
- *
4
- * Implements the OpenAI Responses API (`POST /v1/responses`),
5
- * the officially recommended path for GPT-5.x text generation.
6
- *
7
- * Key differences from OpenAI Chat Completions:
8
- * - Endpoint: POST {baseUrl}/v1/responses
9
- * - Request body uses `input` (not `messages`), `instructions`, `reasoning`
10
- * - SSE events: response.output_text.delta, response.function_call_arguments.delta,
11
- * response.completed, etc.
12
- * - Tool defs: { type: "function", name, parameters } (not nested under `function:`)
13
- * - Tool results: { type: "function_call_output", call_id, output }
14
- * - Context persistence: previous_response_id for server-side session continuation
15
- * - Structured output: `text: { format: { type: "json_schema", ... } }`
16
- * - Reasoning: `reasoning: { effort, summary }` for GPT-5.x models
17
- *
18
- * Wire format reference:
19
- * https://developers.openai.com/api/docs/api-reference/responses/create
20
- * https://developers.openai.com/api/docs/api-reference/responses/streaming-events
21
- *
22
- * Design: Closely mirrors volcengine-responses.ts patterns while adapting to
23
- * OpenAI-specific wire format. Shared LLMChunk output makes upper layers
24
- * transport-agnostic.
25
- */
26
- import type { LLMChunk, LLMRequest, LLMTransport } from "../transport.js";
27
- import type { ProviderQuirks } from "../provider-def.js";
28
- import type { FileUploadAdapter } from "../file-upload-service.js";
29
- export interface OpenAIResponsesTransportConfig {
30
- baseUrl: string;
31
- extraHeaders?: Record<string, string>;
32
- timeoutMs?: number;
33
- quirks?: ProviderQuirks;
34
- fileUploadAdapter?: FileUploadAdapter;
35
- }
36
- export declare class OpenAIResponsesTransport implements LLMTransport {
37
- private baseUrl;
38
- private extraHeaders;
39
- private timeoutMs;
40
- private quirks;
41
- private fileUploadAdapter?;
42
- constructor(config: OpenAIResponsesTransportConfig);
43
- stream(request: LLMRequest, apiKey: string, signal?: AbortSignal): AsyncGenerator<LLMChunk>;
44
- private buildRequestBody;
45
- private fetchAndStream;
46
- private handleNonStreamingResponse;
47
- /**
48
- * Parse OpenAI Responses API SSE stream.
49
- *
50
- * Event format: "event: <type>\ndata: <json>\n\n"
51
- * Key events:
52
- * - response.output_text.delta 鈫?text content delta
53
- * - response.reasoning_summary_text.delta 鈫?reasoning summary text
54
- * - response.function_call_arguments.delta 鈫?tool call arguments streaming
55
- * - response.output_item.added 鈫?new output item started
56
- * - response.output_item.done 鈫?output item completed
57
- * - response.content_part.done 鈫?content part completed (annotations)
58
- * - response.completed 鈫?full response complete with usage
59
- * - response.failed 鈫?error
60
- */
61
- private parseSSEStream;
62
- private processEvent;
63
- }
@@ -1,50 +0,0 @@
1
- /**
2
- * Qwen (DashScope) Media Transport — TTS + Video Generation.
3
- *
4
- * DashScope async task API pattern:
5
- * Submit: POST /api/v1/services/aigc/<service>/generation (X-DashScope-Async: enable)
6
- * Poll: GET /api/v1/tasks/{taskId}
7
- *
8
- * Auth: Authorization: Bearer $DASHSCOPE_API_KEY
9
- * Docs: https://help.aliyun.com/zh/model-studio/developer-reference/
10
- *
11
- * Video models: wan2.7-t2v (text-to-video), wan2.7-i2v (image-to-video)
12
- * TTS models: qwen3-tts-voicedesign, cosyvoice-v2
13
- */
14
- import type { AsyncMediaTransport, MediaRequest, MediaResult, MediaType } from "../media-transport.js";
15
- export interface QwenMediaConfig {
16
- /** Base URL, e.g. "https://dashscope.aliyuncs.com" */
17
- baseUrl: string;
18
- timeoutMs?: number;
19
- }
20
- export declare class QwenMediaTransport implements AsyncMediaTransport {
21
- readonly supportedTypes: readonly MediaType[];
22
- private baseUrl;
23
- private timeoutMs;
24
- constructor(config: QwenMediaConfig);
25
- generate(request: MediaRequest, apiKey: string, signal?: AbortSignal): Promise<MediaResult>;
26
- private pollTask;
27
- private generateVideo;
28
- /**
29
- * Query a single task by ID using DashScope unified task endpoint.
30
- * GET /api/v1/tasks/{taskId}
31
- */
32
- getTaskStatus(taskId: string, apiKey: string, signal?: AbortSignal): Promise<{
33
- status: string;
34
- task: Record<string, unknown>;
35
- }>;
36
- /**
37
- * List tasks — DashScope has a task list API.
38
- * GET /api/v1/tasks?page_no=1&page_size=20&status=RUNNING
39
- */
40
- listVideoTasks(apiKey: string, options?: {
41
- after?: string;
42
- limit?: number;
43
- status?: string;
44
- }, signal?: AbortSignal): Promise<Record<string, unknown>>;
45
- /**
46
- * Cancel/delete — DashScope does not have a public task cancellation API.
47
- */
48
- deleteVideoTask(_taskId: string, _apiKey: string, _signal?: AbortSignal): Promise<void>;
49
- private extractMediaUrl;
50
- }
@@ -1,183 +0,0 @@
1
- /**
2
- * Realtime WebSocket Transport — bidirectional audio/voice streaming
3
- * via the OpenAI Realtime API protocol (also compatible with GLM Realtime).
4
- *
5
- * ## Protocol: WebSocket JSON events
6
- *
7
- * Client → Server:
8
- * - session.update: configure session (model, voice, tools, etc.)
9
- * - input_audio_buffer.append: send audio chunks (base64 PCM16)
10
- * - input_audio_buffer.commit: signal end of audio input
11
- * - conversation.item.create: inject text/function_result items
12
- * - response.create: request a model response
13
- * - response.cancel: abort in-progress response
14
- *
15
- * Server → Client:
16
- * - session.created: session initialized
17
- * - session.updated: config acknowledged
18
- * - input_audio_buffer.speech_started: VAD detected speech
19
- * - input_audio_buffer.speech_stopped: VAD detected silence
20
- * - response.created: response generation started
21
- * - response.output_item.added: new output item (text/audio/function_call)
22
- * - response.audio.delta: audio chunk (base64 PCM16)
23
- * - response.audio_transcript.delta: transcript of generated speech
24
- * - response.text.delta: text generation delta
25
- * - response.function_call_arguments.delta: tool call args delta
26
- * - response.function_call_arguments.done: tool call complete
27
- * - response.output_item.done: output item finished
28
- * - response.done: full response complete
29
- * - error: server error
30
- *
31
- * ## Architecture
32
- *
33
- * RealtimeTransport manages a single persistent WebSocket connection per session.
34
- * It exposes an event-driven API (AsyncGenerator) that the agent tool-loop
35
- * can consume for voice-enabled interactions.
36
- *
37
- * Docs:
38
- * - OpenAI: https://platform.openai.com/docs/api-reference/realtime
39
- * - GLM: https://docs.bigmodel.cn/cn/guide/develop/realtime-api
40
- */
41
- export interface RealtimeConfig {
42
- /** WebSocket endpoint (e.g. "wss://api.openai.com/v1/realtime") */
43
- baseUrl: string;
44
- /** Model to use (e.g. "gpt-realtime-2", "glm-realtime") */
45
- model: string;
46
- /** API key */
47
- apiKey: string;
48
- /** Voice for TTS output */
49
- voice?: string;
50
- /** Input modalities: "text", "audio", or both */
51
- inputModalities?: Array<"text" | "audio">;
52
- /** Output modalities: "text", "audio", or both */
53
- outputModalities?: Array<"text" | "audio">;
54
- /** Temperature for generation */
55
- temperature?: number;
56
- /** Tool definitions for function calling */
57
- tools?: RealtimeTool[];
58
- /** Voice Activity Detection mode */
59
- vadMode?: "server_vad" | "none";
60
- /** VAD threshold (0.0-1.0) */
61
- vadThreshold?: number;
62
- /** Auth type: "header" (OpenAI) or "query" (GLM) */
63
- authMode?: "header" | "query";
64
- }
65
- export interface RealtimeTool {
66
- type: "function";
67
- name: string;
68
- description: string;
69
- parameters: Record<string, unknown>;
70
- }
71
- export type RealtimeEvent = {
72
- type: "session_created";
73
- sessionId: string;
74
- } | {
75
- type: "speech_started";
76
- } | {
77
- type: "speech_stopped";
78
- audioEndMs: number;
79
- } | {
80
- type: "audio_delta";
81
- delta: string;
82
- } | {
83
- type: "audio_transcript_delta";
84
- delta: string;
85
- } | {
86
- type: "text_delta";
87
- delta: string;
88
- } | {
89
- type: "function_call_start";
90
- callId: string;
91
- name: string;
92
- } | {
93
- type: "function_call_delta";
94
- callId: string;
95
- delta: string;
96
- } | {
97
- type: "function_call_done";
98
- callId: string;
99
- name: string;
100
- arguments: string;
101
- } | {
102
- type: "response_done";
103
- usage?: RealtimeUsage;
104
- } | {
105
- type: "error";
106
- code: string;
107
- message: string;
108
- } | {
109
- type: "closed";
110
- code: number;
111
- reason: string;
112
- };
113
- export interface RealtimeUsage {
114
- inputTokens: number;
115
- outputTokens: number;
116
- inputAudioTokens?: number;
117
- outputAudioTokens?: number;
118
- }
119
- /**
120
- * Manages a persistent WebSocket connection for real-time audio/voice
121
- * interactions with an LLM provider.
122
- *
123
- * Usage:
124
- * ```ts
125
- * const rt = new RealtimeTransport(config);
126
- * rt.connect();
127
- *
128
- * // Send audio
129
- * rt.appendAudio(base64Chunk);
130
- * rt.commitAudio();
131
- *
132
- * // Or send text
133
- * rt.sendText("Hello!");
134
- *
135
- * // Submit function results
136
- * rt.sendFunctionResult(callId, result);
137
- *
138
- * // Consume events
139
- * for await (const event of rt.events()) {
140
- * switch (event.type) {
141
- * case "audio_delta": playAudio(event.delta); break;
142
- * case "function_call_done": handleToolCall(event); break;
143
- * }
144
- * }
145
- *
146
- * rt.close();
147
- * ```
148
- */
149
- export declare class RealtimeTransport {
150
- private ws;
151
- private config;
152
- private eventQueue;
153
- private waiters;
154
- private closed;
155
- constructor(config: RealtimeConfig);
156
- /** Open WebSocket connection and configure session. */
157
- connect(): Promise<void>;
158
- /** Send audio data (base64 PCM16). */
159
- appendAudio(base64Chunk: string): void;
160
- /** Mark end of audio input and trigger response. */
161
- commitAudio(): void;
162
- /** Send a text message. */
163
- sendText(text: string): void;
164
- /** Submit a function call result back to the model. */
165
- sendFunctionResult(callId: string, output: string): void;
166
- /** Trigger a model response (e.g. after sending text). */
167
- requestResponse(): void;
168
- /** Cancel an in-progress response. */
169
- cancelResponse(): void;
170
- /** Async iterator of server events. */
171
- events(): AsyncGenerator<RealtimeEvent>;
172
- /** Close the WebSocket connection. */
173
- close(): void;
174
- private buildUrl;
175
- private sendSessionUpdate;
176
- private send;
177
- private push;
178
- private drainWaiters;
179
- /**
180
- * Parse a server-sent JSON event into our typed event(s).
181
- */
182
- private parseServerEvent;
183
- }
@@ -1,58 +0,0 @@
1
- /**
2
- * Volcengine Grounding — spatial coordinate parser (volcengine-ProviderMax §14).
3
- *
4
- * Parses model-emitted spatial reference tags from text output:
5
- * - <bbox>x_min y_min x_max y_max</bbox> → bounding box
6
- * - <point>x y</point> → single point
7
- * - <polygon>x1 y1 x2 y2 ...</polygon> → polygon vertices
8
- *
9
- * All coordinates are in normalized 1000×1000 space, range [0, 999].
10
- * Use `toPixelCoords()` to convert to actual image pixel coordinates.
11
- */
12
- export type SpatialReference = {
13
- type: "bbox";
14
- x1: number;
15
- y1: number;
16
- x2: number;
17
- y2: number;
18
- space: "normalized_1000";
19
- } | {
20
- type: "point";
21
- x: number;
22
- y: number;
23
- space: "normalized_1000";
24
- } | {
25
- type: "polygon";
26
- points: Array<{
27
- x: number;
28
- y: number;
29
- }>;
30
- space: "normalized_1000";
31
- };
32
- export interface PixelBbox {
33
- x1: number;
34
- y1: number;
35
- x2: number;
36
- y2: number;
37
- }
38
- export interface PixelPoint {
39
- x: number;
40
- y: number;
41
- }
42
- /**
43
- * Extract all spatial references from model output text.
44
- * Returns an empty array if no grounding tags are found.
45
- */
46
- export declare function parseGroundingTags(text: string): SpatialReference[];
47
- /**
48
- * Convert a normalized 1000×1000 bounding box to pixel coordinates.
49
- */
50
- export declare function bboxToPixels(ref: Extract<SpatialReference, {
51
- type: "bbox";
52
- }>, width: number, height: number): PixelBbox;
53
- /**
54
- * Convert a normalized 1000×1000 point to pixel coordinates.
55
- */
56
- export declare function pointToPixels(ref: Extract<SpatialReference, {
57
- type: "point";
58
- }>, width: number, height: number): PixelPoint;