qlogicagent 0.5.3 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. package/README.md +24 -23
  2. package/dist/agent.js +13 -13
  3. package/dist/cli.js +218 -184
  4. package/dist/contracts.js +1 -1
  5. package/dist/index.js +391 -20
  6. package/dist/orchestration.js +14 -105
  7. package/dist/types/agent/agent.d.ts +1 -1
  8. package/dist/types/agent/constants.d.ts +2 -2
  9. package/dist/types/agent/tool-access.d.ts +30 -0
  10. package/dist/types/agent/tool-loop.d.ts +2 -4
  11. package/dist/types/agent/types.d.ts +51 -13
  12. package/dist/types/cli/main.d.ts +3 -3
  13. package/dist/types/cli/stdio-server.d.ts +89 -7
  14. package/dist/types/cli/tool-bootstrap.d.ts +16 -5
  15. package/dist/types/cli/transport.d.ts +40 -0
  16. package/dist/types/contracts/index.d.ts +0 -1
  17. package/dist/types/contracts/todo.d.ts +9 -0
  18. package/dist/types/index.d.ts +3 -0
  19. package/dist/types/llm/index.d.ts +7 -1
  20. package/dist/types/llm/media-client.d.ts +43 -0
  21. package/dist/types/llm/media-transport.d.ts +80 -0
  22. package/dist/types/llm/model-catalog.d.ts +5 -5
  23. package/dist/types/llm/provider-def.d.ts +7 -0
  24. package/dist/types/llm/provider-registry.d.ts +1 -1
  25. package/dist/types/llm/transport.d.ts +2 -0
  26. package/dist/types/llm/transports/anthropic-messages.d.ts +34 -11
  27. package/dist/types/llm/transports/gemini-media.d.ts +21 -0
  28. package/dist/types/llm/transports/minimax-media.d.ts +21 -0
  29. package/dist/types/llm/transports/openai-chat.d.ts +1 -1
  30. package/dist/types/llm/transports/openai-media.d.ts +24 -0
  31. package/dist/types/llm/transports/qwen-media.d.ts +25 -0
  32. package/dist/types/llm/transports/volcengine-media.d.ts +34 -0
  33. package/dist/types/orchestration/index.d.ts +14 -112
  34. package/dist/types/orchestration/skill-improvement.d.ts +2 -2
  35. package/dist/types/orchestration/{fork-subagent.d.ts → subagent/fork-subagent.d.ts} +2 -4
  36. package/dist/types/orchestration/{tool-schema.d.ts → tool-loop/tool-schema.d.ts} +1 -2
  37. package/dist/types/protocol/index.d.ts +7 -0
  38. package/dist/types/protocol/methods.d.ts +380 -0
  39. package/dist/types/protocol/notifications.d.ts +296 -0
  40. package/dist/types/runtime/execution/dream-agent.d.ts +1 -1
  41. package/dist/types/runtime/execution/forked-agent.d.ts +1 -3
  42. package/dist/types/runtime/hooks/context-compression.d.ts +1 -1
  43. package/dist/types/runtime/infra/agent-paths.d.ts +57 -0
  44. package/dist/types/runtime/infra/checkpoint-backend.d.ts +8 -0
  45. package/dist/types/runtime/infra/disk-storage.d.ts +36 -0
  46. package/dist/types/runtime/infra/file-watcher.d.ts +2 -2
  47. package/dist/types/runtime/infra/index.d.ts +2 -0
  48. package/dist/types/runtime/infra/secure-storage.d.ts +1 -1
  49. package/dist/types/runtime/infra/task-runtime.d.ts +1 -1
  50. package/dist/types/runtime/prompt/environment-context.d.ts +1 -1
  51. package/dist/types/runtime/prompt/instruction-loader.d.ts +6 -6
  52. package/dist/types/runtime/session/index.d.ts +1 -1
  53. package/dist/types/runtime/session/session-memory.d.ts +0 -1
  54. package/dist/types/runtime/session/session-persistence.d.ts +1 -1
  55. package/dist/types/runtime/session/session-state.d.ts +18 -9
  56. package/dist/types/skills/index.d.ts +20 -26
  57. package/dist/types/skills/mcp/mcp-manager.d.ts +3 -4
  58. package/dist/types/skills/mcp/mcp-stdio-client.d.ts +1 -1
  59. package/dist/types/skills/{memory-store.d.ts → memory/memory-store.d.ts} +20 -0
  60. package/dist/types/skills/{memory-tool.d.ts → memory/memory-tool.d.ts} +20 -0
  61. package/dist/types/skills/{qmemory-adapter.d.ts → memory/qmemory-adapter.d.ts} +1 -0
  62. package/dist/types/skills/permissions/hook-runner.d.ts +1 -1
  63. package/dist/types/skills/permissions/settings-watcher.d.ts +2 -2
  64. package/dist/types/skills/plugins/plugin-api.d.ts +1 -1
  65. package/dist/types/skills/plugins/plugin-loader.d.ts +1 -4
  66. package/dist/types/skills/plugins/plugin-marketplace.d.ts +1 -1
  67. package/dist/types/skills/portable-tool.d.ts +34 -1
  68. package/dist/types/skills/todo-tool.d.ts +51 -42
  69. package/dist/types/skills/tools/instructions-tool.d.ts +20 -58
  70. package/dist/types/skills/tools/shell/shell-exec.d.ts +2 -0
  71. package/dist/types/skills/tools/skill-invoke-tool.d.ts +2 -2
  72. package/dist/types/skills/tools.d.ts +65 -0
  73. package/package.json +2 -2
  74. package/dist/types/contracts/skill-candidate.d.ts +0 -63
  75. package/dist/types/orchestration/curator-scheduler.d.ts +0 -119
  76. package/dist/types/orchestration/memory-provider.d.ts +0 -14
  77. package/dist/types/orchestration/skill-candidate.d.ts +0 -52
  78. package/dist/types/orchestration/skill-consolidation.d.ts +0 -123
  79. package/dist/types/orchestration/skill-similarity.d.ts +0 -98
  80. package/dist/types/orchestration/team-orchestration.d.ts +0 -195
  81. package/dist/types/orchestration/team-tool-loop-wiring.d.ts +0 -92
  82. package/dist/types/skills/memory-query-tool.d.ts +0 -43
  83. package/dist/types/skills/tool-registry.d.ts +0 -29
  84. package/dist/types/skills/tools/memory-tool.d.ts +0 -74
  85. package/dist/types/skills/tools/pdf-tool.d.ts +0 -66
  86. /package/dist/types/orchestration/{context-collapse.d.ts → context/context-collapse.d.ts} +0 -0
  87. /package/dist/types/orchestration/{context-compression.d.ts → context/context-compression.d.ts} +0 -0
  88. /package/dist/types/orchestration/{reactive-compact.d.ts → context/reactive-compact.d.ts} +0 -0
  89. /package/dist/types/orchestration/{turn-loop-guard.d.ts → context/turn-loop-guard.d.ts} +0 -0
  90. /package/dist/types/orchestration/{error-classification.d.ts → error-handling/error-classification.d.ts} +0 -0
  91. /package/dist/types/orchestration/{failover-classification.d.ts → error-handling/failover-classification.d.ts} +0 -0
  92. /package/dist/types/orchestration/{failover-error.d.ts → error-handling/failover-error.d.ts} +0 -0
  93. /package/dist/types/orchestration/{retry-loop.d.ts → error-handling/retry-loop.d.ts} +0 -0
  94. /package/dist/types/orchestration/{agent-registry.d.ts → subagent/agent-registry.d.ts} +0 -0
  95. /package/dist/types/orchestration/{task-types.d.ts → subagent/task-types.d.ts} +0 -0
  96. /package/dist/types/orchestration/{conversation-repair.d.ts → tool-loop/conversation-repair.d.ts} +0 -0
  97. /package/dist/types/orchestration/{tool-choice-policy.d.ts → tool-loop/tool-choice-policy.d.ts} +0 -0
  98. /package/dist/types/orchestration/{tool-loop-state.d.ts → tool-loop/tool-loop-state.d.ts} +0 -0
  99. /package/dist/types/skills/{memory-extractor.d.ts → memory/memory-extractor.d.ts} +0 -0
  100. /package/dist/types/skills/{skill-frontmatter.d.ts → skill-system/skill-frontmatter.d.ts} +0 -0
  101. /package/dist/types/skills/{skill-guard.d.ts → skill-system/skill-guard.d.ts} +0 -0
  102. /package/dist/types/skills/{skill-loader.d.ts → skill-system/skill-loader.d.ts} +0 -0
  103. /package/dist/types/skills/{skill-source.d.ts → skill-system/skill-source.d.ts} +0 -0
  104. /package/dist/types/skills/{skill-types.d.ts → skill-system/skill-types.d.ts} +0 -0
@@ -1,21 +1,25 @@
1
1
  /**
2
- * StdioServer — JSON-RPC 2.0 over stdio protocol handler.
2
+ * StdioServer — JSON-RPC 2.0 protocol handler.
3
3
  *
4
- * Reads line-delimited JSON from stdin, dispatches to handlers,
5
- * writes JSON-RPC responses/notifications to stdout.
4
+ * Receives JSON-RPC messages from a Transport, dispatches to handlers,
5
+ * writes JSON-RPC responses/notifications back through the Transport.
6
6
  * All log output goes to stderr.
7
7
  *
8
- * Protocol: each line on stdin/stdout is a single JSON-RPC 2.0 message.
8
+ * Protocol: each message is a single JSON-RPC 2.0 object.
9
+ * The physical I/O layer is abstracted via the Transport interface.
9
10
  */
11
+ import type { Transport } from "./transport.js";
10
12
  export interface StdioServerConfig {
11
13
  verbose: boolean;
14
+ transport?: Transport;
12
15
  }
13
16
  export declare class StdioServer {
14
17
  private running;
15
18
  private activeTurn;
16
19
  private verbose;
20
+ private transport;
17
21
  private registry;
18
- private toolRegistry;
22
+ private mediaClient;
19
23
  private agent;
20
24
  private lastLlmConfigKey;
21
25
  private currentSessionId;
@@ -40,15 +44,93 @@ export declare class StdioServer {
40
44
  start(): void;
41
45
  stop(): void;
42
46
  private handleMessage;
43
- private handleHello;
47
+ /**
48
+ * `initialize` — the sole handshake handler.
49
+ * Accepts both legacy flat params (`hostName`, `hostVersion`) and
50
+ * Codex-style nested params (`host.name`, `host.version`).
51
+ * Always returns the InitializeResult shape.
52
+ */
53
+ private handleInitialize;
54
+ /**
55
+ * `thread.create` — create a new thread (session container).
56
+ * Maps threadId → sessionId for the underlying session system.
57
+ */
58
+ private handleThreadCreate;
59
+ /**
60
+ * `thread.list` — list available threads. Delegates to session listing.
61
+ */
62
+ private handleThreadList;
44
63
  private handlePing;
45
64
  private handleAbort;
46
65
  private handleApprovalResponse;
47
- private handleSessionList;
48
66
  private handleSessionResume;
49
67
  private handleTurn;
68
+ private static readonly SUGGESTION_PROMPT;
69
+ /**
70
+ * Generate follow-up suggestions after a turn completes.
71
+ * Fire-and-forget — failures are silently logged. Non-blocking.
72
+ */
73
+ private generateSuggestions;
50
74
  private handleDream;
51
75
  private resolveAgent;
76
+ /**
77
+ * Load LLM settings from ~/.qlogicagent/settings.json (sync).
78
+ * Returns provider/model/apiKey if found, undefined otherwise.
79
+ */
80
+ private loadSettingsSync;
81
+ /**
82
+ * `session.getInfo` — Return session metadata, filesystem paths, and usage summary.
83
+ * Aligns with Codex/Copilot session introspection capability.
84
+ */
85
+ private handleSessionGetInfo;
86
+ /**
87
+ * `memory.list` — Enumerate available memory sources (local store + qmemory).
88
+ */
89
+ private handleMemoryList;
90
+ /**
91
+ * `memory.read` — Read memory content from local store or QMemory.
92
+ */
93
+ private handleMemoryRead;
94
+ /**
95
+ * `memory.write` — Write memory content to local store (agent notes / user profile).
96
+ */
97
+ private handleMemoryWrite;
98
+ /**
99
+ * `tools.list` — Return available tool definitions (local + MCP + plugin).
100
+ * Supports optional category filter.
101
+ */
102
+ private handleToolsList;
103
+ /**
104
+ * `config.get` — Read current agent runtime configuration.
105
+ * Returns merged user-level + project-level settings.
106
+ */
107
+ private handleConfigGet;
108
+ /**
109
+ * `config.update` — Update agent runtime configuration (merges into settings.json).
110
+ */
111
+ private handleConfigUpdate;
112
+ /**
113
+ * `todos.list` — Query current todo items and summary.
114
+ * Invokes the registered todo tool's list action.
115
+ */
116
+ private handleTodosList;
117
+ /**
118
+ * `memory.search` — Vector search via QMemory adapter.
119
+ * Powers the memory page's search feature.
120
+ */
121
+ private handleMemorySearch;
122
+ /**
123
+ * `memory.delete` — Remove a memory entry by substring match (local) or ID (qmemory).
124
+ */
125
+ private handleMemoryDelete;
126
+ /**
127
+ * `tasks.list` — Query running/completed infrastructure tasks.
128
+ */
129
+ private handleTasksList;
130
+ /**
131
+ * `tasks.cancel` — Cancel a running task by ID.
132
+ */
133
+ private handleTasksCancel;
52
134
  private sendResponse;
53
135
  private sendNotification;
54
136
  private writeStdout;
@@ -1,6 +1,13 @@
1
- import { ToolRegistry } from "../skills/tool-registry.js";
1
+ import type { PortableTool } from "../skills/portable-tool.js";
2
2
  import { type ExecProgress } from "../skills/tools/exec-tool.js";
3
3
  import type { AgentLogger } from "../agent/types.js";
4
+ import type { MediaClient } from "../llm/media-client.js";
5
+ /**
6
+ * Set the media client + API keys for generation tools.
7
+ * Tools will call vendor APIs directly through MediaTransport.
8
+ * Keys map: { providerId: apiKey, ... }
9
+ */
10
+ export declare function setMediaClientConfig(client: MediaClient | undefined, apiKeys?: Record<string, string>, onMediaUsage?: (model: string, billingUnit: string, quantity: number) => void): void;
4
11
  export interface BootstrapConfig {
5
12
  workdir?: string;
6
13
  log?: AgentLogger;
@@ -8,12 +15,16 @@ export interface BootstrapConfig {
8
15
  onExecProgress?(progress: ExecProgress): void;
9
16
  }
10
17
  /**
11
- * Create and populate a ToolRegistry with all locally-executable tools.
18
+ * Create all locally-executable tools and install into centralized tool pool.
19
+ *
20
+ * CC parity: getAllBaseTools() returns flat array, installed via setToolPool().
21
+ * Tools execute IN-PROCESS (zero IPC).
12
22
  *
13
- * Tools registered here execute IN-PROCESS (zero IPC).
14
- * All tools are agent-local — no Gateway relay.
23
+ * @alias initToolDeps kept for backward compat with existing call sites.
15
24
  */
16
- export declare function bootstrapToolRegistry(config?: BootstrapConfig): ToolRegistry;
25
+ export declare function getAllBaseTools(config?: BootstrapConfig): PortableTool[];
26
+ /** @deprecated Use getAllBaseTools() instead. */
27
+ export declare const initToolDeps: typeof getAllBaseTools;
17
28
  /**
18
29
  * Update the working directory for all local tool deps.
19
30
  * Called when Gateway sends a new workdir via config.
@@ -0,0 +1,40 @@
1
+ /**
2
+ * Transport — abstract I/O layer for JSON-RPC message exchange.
3
+ *
4
+ * Decouples the agent protocol handler from the physical transport
5
+ * (stdin/stdout, WebSocket, Unix Domain Socket, etc.).
6
+ *
7
+ * Each transport reads incoming JSON-RPC messages and emits them
8
+ * to a registered handler; outgoing messages are sent via send().
9
+ */
10
+ export interface Transport {
11
+ /** Register the message handler (called for each valid parsed message). */
12
+ onMessage(handler: (msg: unknown) => void): void;
13
+ /** Register the close handler (called when the transport closes). */
14
+ onClose(handler: () => void): void;
15
+ /** Send a JSON-RPC message (object will be serialized to JSON). */
16
+ send(msg: unknown): void;
17
+ /** Start listening for incoming messages. */
18
+ start(): void;
19
+ /** Close the transport gracefully. */
20
+ close(): void;
21
+ }
22
+ /**
23
+ * Line-delimited JSON over stdin/stdout.
24
+ * Each line on stdin is a JSON message; each send() writes one JSON line to stdout.
25
+ * All diagnostic logging goes to stderr.
26
+ */
27
+ export declare class StdioTransport implements Transport {
28
+ private messageHandler;
29
+ private closeHandler;
30
+ private verbose;
31
+ constructor(opts?: {
32
+ verbose?: boolean;
33
+ });
34
+ onMessage(handler: (msg: unknown) => void): void;
35
+ onClose(handler: () => void): void;
36
+ send(msg: unknown): void;
37
+ start(): void;
38
+ close(): void;
39
+ private log;
40
+ }
@@ -7,4 +7,3 @@
7
7
  export * from "./planner.js";
8
8
  export * from "./todo.js";
9
9
  export * from "./hooks.js";
10
- export * from "./skill-candidate.js";
@@ -4,11 +4,20 @@ export interface TodoItem {
4
4
  id: number;
5
5
  title: string;
6
6
  status: TodoItemStatus;
7
+ /** Detailed task description (V2). */
8
+ description?: string;
9
+ /** Owner agent/subagent identifier for per-agent isolation (V2). */
10
+ owner?: string;
11
+ /** IDs of tasks that must complete before this one can start (V2). */
12
+ blockedBy?: number[];
13
+ /** IDs of tasks this one blocks from starting (V2). */
14
+ blocks?: number[];
7
15
  }
8
16
  export interface TodoListSummary {
9
17
  total: number;
10
18
  completed: number;
11
19
  inProgress: number;
12
20
  notStarted: number;
21
+ blocked: number;
13
22
  }
14
23
  export declare function summarizeTodoList(items: readonly TodoItem[]): TodoListSummary;
@@ -7,6 +7,9 @@ export { Agent } from "./agent/agent.js";
7
7
  export type { AgentLogger, ChatMessage, ToolDefinition, ToolInvoker, TurnConfig, TurnEvent, TurnRequest, HookRegistry, } from "./agent/types.js";
8
8
  export { parseCliArgs } from "./config/config.js";
9
9
  export type { AgentConfig } from "./config/config.js";
10
+ export type { Transport } from "./cli/transport.js";
11
+ export { StdioTransport } from "./cli/transport.js";
12
+ export { StdioServer, type StdioServerConfig } from "./cli/stdio-server.js";
10
13
  export { ProviderRegistry, createLLMClient, autoDetectProvider, } from "./llm/index.js";
11
14
  export type { LLMTransport, LLMRequest, LLMChunk } from "./llm/index.js";
12
15
  export { createHookRegistry } from "./runtime/hooks/hook-registry.js";
@@ -3,9 +3,11 @@
3
3
  *
4
4
  * Provides: ProviderDef + LLMTransport + ProviderRegistry + LLMClient factory
5
5
  */
6
- export type { ProviderDef, ModelInfo, TransportType, AuthType } from "./provider-def.js";
6
+ export type { ProviderDef, ModelInfo, TransportType, AuthType, MediaCapability } from "./provider-def.js";
7
7
  export type { LLMTransport, LLMRequest, LLMChunk, AccumulatedToolCall, } from "./transport.js";
8
8
  export { accumulateToolCalls } from "./transport.js";
9
+ export type { MediaTransport, MediaRequest, MediaResult, MediaType } from "./media-transport.js";
10
+ export { MediaClient, type MediaClientConfig, type ResolvedMediaModel } from "./media-client.js";
9
11
  export { ProviderRegistry } from "./provider-registry.js";
10
12
  export { BUILTIN_PROVIDERS } from "./builtin-providers.js";
11
13
  export { ModelCatalog } from "./model-catalog.js";
@@ -13,4 +15,8 @@ export type { LLMClientConfig, LLMClient } from "./llm-client.js";
13
15
  export { createLLMClient, autoDetectProvider } from "./llm-client.js";
14
16
  export { OpenAIChatTransport } from "./transports/openai-chat.js";
15
17
  export { AnthropicMessagesTransport } from "./transports/anthropic-messages.js";
18
+ export { VolcengineMediaTransport } from "./transports/volcengine-media.js";
19
+ export { OpenAIMediaTransport } from "./transports/openai-media.js";
20
+ export { MiniMaxMediaTransport } from "./transports/minimax-media.js";
21
+ export { GeminiMediaTransport } from "./transports/gemini-media.js";
16
22
  export { isDebugTransportEnabled, createDebugTransport } from "./debug-transport.js";
@@ -0,0 +1,43 @@
1
+ /**
2
+ * Media Client factory — resolves ProviderDef → creates MediaTransport instances.
3
+ *
4
+ * Parallel to createLLMClient() but for generation models (image/video/music/3D).
5
+ * Uses the same ProviderRegistry to look up provider config, then instantiates
6
+ * the correct media transport adapter.
7
+ *
8
+ * The MediaClient holds a provider-keyed transport cache so that repeated
9
+ * generation calls reuse the same adapter instance.
10
+ */
11
+ import type { MediaTransport } from "./media-transport.js";
12
+ import type { ProviderDef, MediaCapability, ModelInfo } from "./provider-def.js";
13
+ import type { ProviderRegistry } from "./provider-registry.js";
14
+ export interface MediaClientConfig {
15
+ registry: ProviderRegistry;
16
+ }
17
+ export interface ResolvedMediaModel {
18
+ providerId: string;
19
+ providerDef: ProviderDef;
20
+ modelInfo: ModelInfo;
21
+ mediaType: MediaCapability;
22
+ }
23
+ export declare class MediaClient {
24
+ private registry;
25
+ private transports;
26
+ constructor(config: MediaClientConfig);
27
+ /**
28
+ * Find the best available model for a given media type.
29
+ * Scans all providers for models with matching mediaType.
30
+ * If preferredProvider is set, search that first.
31
+ */
32
+ resolveModel(mediaType: MediaCapability, preferredProvider?: string): ResolvedMediaModel | undefined;
33
+ /**
34
+ * Get a MediaTransport for a specific provider.
35
+ * Creates and caches the adapter on first access.
36
+ */
37
+ getTransport(providerId: string): MediaTransport | undefined;
38
+ /**
39
+ * List all available generation models across all providers.
40
+ */
41
+ listMediaModels(mediaType?: MediaCapability): ResolvedMediaModel[];
42
+ private findModelInProvider;
43
+ }
@@ -0,0 +1,80 @@
1
+ /**
2
+ * MediaTransport — transport interface for generation APIs (image, video, music, 3D).
3
+ *
4
+ * Parallel to LLMTransport (chat/reasoning), MediaTransport handles
5
+ * non-chat generation endpoints that each vendor exposes differently:
6
+ * - Sync (OpenAI images, Volcengine Seedream, Gemini generateContent)
7
+ * - Async job (Volcengine Seedance/3D, MiniMax music) — submit → poll → result
8
+ *
9
+ * Each provider adapter implements this interface and hides vendor-specific
10
+ * auth, endpoint paths, request shapes, and polling logic.
11
+ */
12
+ export type MediaType = "image" | "video" | "music" | "tts" | "3d";
13
+ export interface MediaRequest {
14
+ /** Generation model id, e.g. "doubao-seedream-5-0-260128", "gpt-image-2" */
15
+ model: string;
16
+ /** What kind of media to generate */
17
+ mediaType: MediaType;
18
+ /** Text prompt for generation */
19
+ prompt: string;
20
+ /** Optional reference image URL (img2img, i2v, img-to-3d) */
21
+ imageUrl?: string;
22
+ /** Desired dimensions, e.g. "1024x1024" */
23
+ size?: string;
24
+ /** Aspect ratio for video, e.g. "16:9" */
25
+ aspectRatio?: string;
26
+ /** Duration in seconds (video, music) */
27
+ duration?: number;
28
+ /** Number of outputs (image) */
29
+ n?: number;
30
+ /** Visual/musical style */
31
+ style?: string;
32
+ /** Intended use / purpose */
33
+ purpose?: string;
34
+ /** Lyrics for music generation */
35
+ lyrics?: string;
36
+ /** Text for TTS */
37
+ text?: string;
38
+ /** TTS channel hint */
39
+ channel?: string;
40
+ /** Source video URLs for edit/merge operations */
41
+ sourceVideos?: string[];
42
+ /** Reference images for edit operations */
43
+ referenceImages?: string[];
44
+ /** Output resolution for upscale, e.g. "1080p" */
45
+ resolution?: string;
46
+ /** Operation variant: generate (default), edit, merge, upscale */
47
+ operation?: "generate" | "edit" | "merge" | "upscale";
48
+ }
49
+ export interface MediaResult {
50
+ /** URLs of generated media files */
51
+ mediaUrls: string[];
52
+ /** Model actually used */
53
+ model?: string;
54
+ /** Output dimensions / format info */
55
+ size?: string;
56
+ /** Total generation time in ms */
57
+ durationMs?: number;
58
+ /** Billing unit type for non-token models */
59
+ billingUnit?: "per_call" | "per_second" | "per_character" | "per_pixel" | "per_token";
60
+ /** Quantity consumed (seconds, characters, pixels, etc.) */
61
+ billingQuantity?: number;
62
+ /** Provider-specific metadata */
63
+ metadata?: Record<string, unknown>;
64
+ }
65
+ export interface MediaTransport {
66
+ /**
67
+ * Generate media content.
68
+ * Handles sync APIs directly and async job APIs (submit + poll) internally.
69
+ *
70
+ * @param request - Generation parameters
71
+ * @param apiKey - User API key (passed explicitly, not from env)
72
+ * @param signal - Optional abort signal
73
+ */
74
+ generate(request: MediaRequest, apiKey: string, signal?: AbortSignal): Promise<MediaResult>;
75
+ /**
76
+ * Which media types this transport supports.
77
+ * Used by the media client factory to route requests.
78
+ */
79
+ readonly supportedTypes: readonly MediaType[];
80
+ }
@@ -1,13 +1,13 @@
1
1
  /**
2
2
  * ModelCatalog — remote model directory with disk cache + fallback.
3
3
  *
4
- * Fetches model metadata from models.dev (open community catalog, 4000+ models).
5
- * Aligned with Hermes agent/models_dev.py caching strategy.
4
+ * Fetches model metadata from models.dev/api.json (community-maintained,
5
+ * comprehensive single-source catalog with 100+ providers).
6
6
  *
7
- * Three-layer fallback:
7
+ * Two-layer fallback:
8
8
  * 1. In-memory cache (process-level, TTL check against disk mtime)
9
- * 2. Disk cache (~/.openclaw/cache/model_catalog.json)
10
- * 3. Remote fetch (https://models.dev/api.json)
9
+ * 2. Disk cache (~/.qlogicagent/cache/model_catalog.json)
10
+ * 3. Remote fetch (models.dev single endpoint)
11
11
  * 4. Stale disk cache (if remote fails)
12
12
  * 5. Empty (caller falls back to builtin-providers.ts hardcoded)
13
13
  *
@@ -11,6 +11,7 @@
11
11
  */
12
12
  export type TransportType = "openai-chat" | "anthropic-messages";
13
13
  export type AuthType = "bearer" | "x-api-key" | "none";
14
+ export type MediaCapability = "image" | "video" | "music" | "tts" | "3d";
14
15
  export interface ProviderDef {
15
16
  /** Unique provider id, e.g. "deepseek", "openai", "anthropic" */
16
17
  id: string;
@@ -56,4 +57,10 @@ export interface ModelInfo {
56
57
  costInput?: number;
57
58
  /** Cost per 1M output tokens (USD) */
58
59
  costOutput?: number;
60
+ /** Cost per 1M cache read tokens (USD) */
61
+ costCacheRead?: number;
62
+ /** Cost per 1M cache write tokens (USD) */
63
+ costCacheWrite?: number;
64
+ /** Media generation capability — undefined means chat/reasoning model */
65
+ mediaType?: MediaCapability;
59
66
  }
@@ -1,5 +1,5 @@
1
1
  /**
2
- * ProviderRegistry — three-layer merge registry for LLM providers.
2
+ * ProviderRegistry — two-layer merge registry for LLM providers.
3
3
  *
4
4
  * Layer 1: builtin-providers.ts hardcoded (lowest priority, ~20 providers)
5
5
  * Layer 2: model-catalog.ts remote (models.dev — enriches model metadata)
@@ -37,6 +37,8 @@ export type LLMChunk = {
37
37
  promptTokens: number;
38
38
  completionTokens: number;
39
39
  reasoningTokens?: number;
40
+ cacheReadTokens?: number;
41
+ cacheCreationTokens?: number;
40
42
  } | {
41
43
  type: "done";
42
44
  finishReason: string;
@@ -1,31 +1,54 @@
1
1
  /**
2
2
  * Anthropic Messages Transport — SSE streaming for Claude API.
3
3
  *
4
- * POST {baseUrl}/v1/messages with stream: true
5
- * Auth: x-api-key: {apiKey} + anthropic-version header
6
- *
7
- * SSE event types:
8
- * message_start, content_block_start, content_block_delta,
9
- * content_block_stop, message_delta, message_stop
10
- *
11
- * Tool use is via content blocks with type "tool_use" + "input_json_delta".
12
- *
13
- * Aligned with Hermes anthropic_messages.py transport.
4
+ * Aligned with CC (claude-code-haha) src/services/api/claude.ts:
5
+ * - cache_control ephemeral injection on system prompt blocks
6
+ * - ensureToolResultPairing() conversation repair before every request
7
+ * - Retry with exponential backoff on transient errors (429/529/overloaded)
8
+ * - Non-streaming fallback when stream errors out
9
+ * - 90s idle watchdog timeout for silently dropped connections
10
+ * - Adaptive/budget thinking with temperature omit
11
+ * - Cache token extraction with >0 guard (CC updateUsage parity)
12
+ * - signature_delta handling for thinking blocks
14
13
  */
15
14
  import type { LLMChunk, LLMRequest, LLMTransport } from "../transport.js";
16
15
  export interface AnthropicTransportConfig {
17
16
  baseUrl: string;
18
17
  /** anthropic-version header (default "2023-06-01") */
19
18
  apiVersion?: string;
20
- /** Timeout in ms (default 180_000) */
19
+ /** Per-request timeout in ms (default 180_000) */
21
20
  timeoutMs?: number;
21
+ /** Stream idle watchdog timeout in ms (default 90_000, CC parity) */
22
+ streamIdleTimeoutMs?: number;
23
+ /** Enable prompt caching via cache_control ephemeral (default true) */
24
+ enablePromptCaching?: boolean;
25
+ /** Max retry attempts on transient errors (default 3) */
26
+ maxRetries?: number;
22
27
  }
23
28
  export declare class AnthropicMessagesTransport implements LLMTransport {
24
29
  private baseUrl;
25
30
  private apiVersion;
26
31
  private timeoutMs;
32
+ private streamIdleTimeoutMs;
33
+ private enablePromptCaching;
34
+ private maxRetries;
27
35
  constructor(config: AnthropicTransportConfig);
28
36
  stream(request: LLMRequest, apiKey: string, signal?: AbortSignal): AsyncGenerator<LLMChunk>;
37
+ /**
38
+ * Stream with idle watchdog timer (CC parity: 90s default).
39
+ * Throws if no chunks received for streamIdleTimeoutMs.
40
+ */
41
+ private streamWithWatchdog;
42
+ /**
43
+ * Non-streaming fallback (CC executeNonStreamingRequest parity).
44
+ * Used when streaming fails after all retries.
45
+ * Caps max_tokens at 64K and adjusts thinking budget accordingly.
46
+ */
47
+ private nonStreamingFallback;
48
+ /**
49
+ * Convert a non-streaming API response to LLMChunk sequence.
50
+ */
51
+ private mapNonStreamingResponse;
29
52
  private parseSSEStream;
30
53
  private mapEvent;
31
54
  }
@@ -0,0 +1,21 @@
1
+ /**
2
+ * Gemini Media Transport — Image generation via Gemini generateContent.
3
+ *
4
+ * Uses responseModalities: ["TEXT", "IMAGE"] with the Gemini REST API.
5
+ * POST /v1beta/models/{model}:generateContent
6
+ * Auth: key= query param or x-goog-api-key header
7
+ * Docs: https://ai.google.dev/gemini-api/docs/image-generation
8
+ */
9
+ import type { MediaTransport, MediaRequest, MediaResult, MediaType } from "../media-transport.js";
10
+ export interface GeminiMediaConfig {
11
+ /** Base URL, e.g. "https://generativelanguage.googleapis.com/v1beta/openai" */
12
+ baseUrl: string;
13
+ timeoutMs?: number;
14
+ }
15
+ export declare class GeminiMediaTransport implements MediaTransport {
16
+ readonly supportedTypes: readonly MediaType[];
17
+ private apiBase;
18
+ private timeoutMs;
19
+ constructor(config: GeminiMediaConfig);
20
+ generate(request: MediaRequest, apiKey: string, signal?: AbortSignal): Promise<MediaResult>;
21
+ }
@@ -0,0 +1,21 @@
1
+ /**
2
+ * MiniMax Media Transport — Music Generation API (music-2.6, music-cover).
3
+ *
4
+ * POST /v1/music_generation (async job: submit → poll → result)
5
+ * Auth: Authorization: Bearer $MINIMAX_API_KEY
6
+ * Docs: https://platform.minimaxi.com/document/Music
7
+ */
8
+ import type { MediaTransport, MediaRequest, MediaResult, MediaType } from "../media-transport.js";
9
+ export interface MiniMaxMediaConfig {
10
+ /** Base URL, e.g. "https://api.minimaxi.com" */
11
+ baseUrl: string;
12
+ timeoutMs?: number;
13
+ }
14
+ export declare class MiniMaxMediaTransport implements MediaTransport {
15
+ readonly supportedTypes: readonly MediaType[];
16
+ private baseUrl;
17
+ private timeoutMs;
18
+ constructor(config: MiniMaxMediaConfig);
19
+ generate(request: MediaRequest, apiKey: string, signal?: AbortSignal): Promise<MediaResult>;
20
+ private pollTask;
21
+ }
@@ -2,7 +2,7 @@
2
2
  * OpenAI Chat Completions Transport — SSE streaming implementation.
3
3
  *
4
4
  * Covers all OpenAI-compatible providers:
5
- * DeepSeek, Qwen, 硅基, Minimax, Moonshot, Groq, Together, OpenRouter, etc.
5
+ * DeepSeek, Qwen, Minimax, Moonshot, OpenRouter, etc.
6
6
  *
7
7
  * POST {baseUrl}/v1/chat/completions with stream: true
8
8
  * Auth: Authorization: Bearer {apiKey}
@@ -0,0 +1,24 @@
1
+ /**
2
+ * OpenAI Media Transport — Images API (gpt-image-2) + Audio Speech API (tts-1).
3
+ *
4
+ * Image: POST /v1/images/generations (sync response, returns URLs)
5
+ * TTS: POST /v1/audio/speech (sync response, returns raw audio bytes)
6
+ * Auth: Authorization: Bearer $OPENAI_API_KEY
7
+ * Docs: https://platform.openai.com/docs/api-reference/images/create
8
+ * https://platform.openai.com/docs/api-reference/audio/createSpeech
9
+ */
10
+ import type { MediaTransport, MediaRequest, MediaResult, MediaType } from "../media-transport.js";
11
+ export interface OpenAIMediaConfig {
12
+ /** Base URL, e.g. "https://api.openai.com" */
13
+ baseUrl: string;
14
+ timeoutMs?: number;
15
+ }
16
+ export declare class OpenAIMediaTransport implements MediaTransport {
17
+ readonly supportedTypes: readonly MediaType[];
18
+ private baseUrl;
19
+ private timeoutMs;
20
+ constructor(config: OpenAIMediaConfig);
21
+ generate(request: MediaRequest, apiKey: string, signal?: AbortSignal): Promise<MediaResult>;
22
+ private generateImage;
23
+ private generateTTS;
24
+ }
@@ -0,0 +1,25 @@
1
+ /**
2
+ * Qwen (DashScope) Media Transport — TTS via CosyVoice.
3
+ *
4
+ * DashScope TTS uses the async task API:
5
+ * Submit: POST /api/v1/services/aigc/text2audio/generation
6
+ * Poll: GET /api/v1/tasks/{taskId}
7
+ *
8
+ * Auth: Authorization: Bearer $DASHSCOPE_API_KEY
9
+ * Docs: https://help.aliyun.com/zh/model-studio/developer-reference/cosyvoice-large-speech-synthesis
10
+ */
11
+ import type { MediaTransport, MediaRequest, MediaResult, MediaType } from "../media-transport.js";
12
+ export interface QwenMediaConfig {
13
+ /** Base URL, e.g. "https://dashscope.aliyuncs.com" */
14
+ baseUrl: string;
15
+ timeoutMs?: number;
16
+ }
17
+ export declare class QwenMediaTransport implements MediaTransport {
18
+ readonly supportedTypes: readonly MediaType[];
19
+ private baseUrl;
20
+ private timeoutMs;
21
+ constructor(config: QwenMediaConfig);
22
+ generate(request: MediaRequest, apiKey: string, signal?: AbortSignal): Promise<MediaResult>;
23
+ private pollTask;
24
+ private extractAudioUrl;
25
+ }
@@ -0,0 +1,34 @@
1
+ /**
2
+ * Volcengine Media Transport — Doubao Seedream (image), Seedance (video), 3D generation.
3
+ *
4
+ * API reference:
5
+ * Image: POST /v3/images/generations (sync)
6
+ * Video: POST /v3/contents/generations/tasks (async job)
7
+ * 3D: POST /v3/3d-contents/generations/tasks (async job)
8
+ *
9
+ * Auth: Authorization: Bearer $ARK_API_KEY
10
+ * Docs: https://www.volcengine.com/docs/82379/1330310
11
+ */
12
+ import type { MediaTransport, MediaRequest, MediaResult, MediaType } from "../media-transport.js";
13
+ export interface VolcengineMediaConfig {
14
+ /** Base URL, e.g. "https://ark.cn-beijing.volces.com/api" */
15
+ baseUrl: string;
16
+ timeoutMs?: number;
17
+ }
18
+ export declare class VolcengineMediaTransport implements MediaTransport {
19
+ readonly supportedTypes: readonly MediaType[];
20
+ private baseUrl;
21
+ private timeoutMs;
22
+ constructor(config: VolcengineMediaConfig);
23
+ generate(request: MediaRequest, apiKey: string, signal?: AbortSignal): Promise<MediaResult>;
24
+ /**
25
+ * Check if this transport can handle a given operation.
26
+ * Video edit/merge/upscale are routed through the same video endpoint.
27
+ */
28
+ canHandle(request: MediaRequest): boolean;
29
+ private generateImage;
30
+ private generateVideo;
31
+ private generate3D;
32
+ private submitTask;
33
+ private pollTask;
34
+ }