converse-mcp-server 2.22.1 → 2.22.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "converse-mcp-server",
3
- "version": "2.22.1",
3
+ "version": "2.22.2",
4
4
  "description": "Converse MCP Server - Converse with other LLMs with chat and consensus tools",
5
5
  "type": "module",
6
6
  "main": "src/index.js",
@@ -25,7 +25,7 @@ const SUPPORTED_MODELS = {
25
25
  contextWindow: 400000,
26
26
  maxOutputTokens: 128000,
27
27
  supportsStreaming: true,
28
- supportsImages: false, // Codex doesn't support images
28
+ supportsImages: true, // Codex SDK 0.118+ supports images via --image (local_image input)
29
29
  supportsTemperature: false, // Codex manages temperature internally
30
30
  supportsWebSearch: false, // Codex accesses files directly, not web
31
31
  timeout: 600000, // 10 minutes
@@ -91,14 +91,19 @@ async function getCodexSDK() {
91
91
  }
92
92
 
93
93
  /**
94
- * Convert message array to single prompt for Codex
95
- * Codex expects single prompts, not message history
94
+ * Convert message array to Codex SDK Input (string | UserInput[])
95
+ * Codex expects single prompts (new thread) or incremental input (resumed thread);
96
+ * history is managed SDK-side.
96
97
  *
97
- * Strategy:
98
- * - For new threads: Extract last user message only
99
- * - For resumed threads: Same - Codex maintains history internally
98
+ * Returns a plain string when the last user message is text-only, or an array
99
+ * of { type: 'text' | 'local_image' } parts when images are present. The SDK
100
+ * passes local_image paths to the CLI via --image.
101
+ *
102
+ * Images must be on-disk files — Converse stores the original path in
103
+ * metadata.path (chat.js / consensus.js set includeMetadata: true). Images
104
+ * without a path (e.g. pasted base64 with no metadata) are skipped.
100
105
  */
101
- function convertMessagesToPrompt(messages) {
106
+ function convertMessagesToCodexInput(messages) {
102
107
  if (!Array.isArray(messages)) {
103
108
  throw new CodexProviderError(
104
109
  'Messages must be an array',
@@ -113,7 +118,6 @@ function convertMessagesToPrompt(messages) {
113
118
  );
114
119
  }
115
120
 
116
- // Find last user message
117
121
  const lastUserMessage = messages.filter((m) => m.role === 'user').pop();
118
122
 
119
123
  if (!lastUserMessage) {
@@ -123,28 +127,46 @@ function convertMessagesToPrompt(messages) {
123
127
  );
124
128
  }
125
129
 
126
- // Extract text content from message
127
130
  if (typeof lastUserMessage.content === 'string') {
128
131
  return lastUserMessage.content;
129
132
  }
130
133
 
131
- // Handle array content (multimodal format)
132
134
  if (Array.isArray(lastUserMessage.content)) {
133
- const textParts = lastUserMessage.content
134
- .filter((item) => item.type === 'text')
135
- .map((item) => item.text);
135
+ const parts = [];
136
+ let droppedImages = 0;
137
+ for (const item of lastUserMessage.content) {
138
+ if (item.type === 'text' && item.text) {
139
+ parts.push({ type: 'text', text: item.text });
140
+ } else if (item.type === 'image') {
141
+ const imagePath = item.metadata?.path || item.metadata?.originalPath;
142
+ if (imagePath) {
143
+ parts.push({ type: 'local_image', path: imagePath });
144
+ } else {
145
+ droppedImages += 1;
146
+ }
147
+ }
148
+ }
136
149
 
137
- // Log warning if images present (Codex doesn't support images)
138
- const hasImages = lastUserMessage.content.some(
139
- (item) => item.type === 'image',
140
- );
141
- if (hasImages) {
150
+ if (droppedImages > 0) {
142
151
  debugLog(
143
- '[Codex] Warning: Images in message will be ignored (Codex does not support multimodal input)',
152
+ `[Codex] Skipped ${droppedImages} image(s) without a file path Codex requires on-disk images`,
144
153
  );
145
154
  }
146
155
 
147
- return textParts.join('\n');
156
+ if (parts.length === 0) {
157
+ throw new CodexProviderError(
158
+ 'Message contained no usable text or image parts',
159
+ ErrorCodes.INVALID_MESSAGES,
160
+ );
161
+ }
162
+
163
+ // Collapse to plain string when there are no images — keeps the non-image
164
+ // path identical to the legacy behavior and slightly simpler for the SDK.
165
+ if (parts.every((p) => p.type === 'text')) {
166
+ return parts.map((p) => p.text).join('\n');
167
+ }
168
+
169
+ return parts;
148
170
  }
149
171
 
150
172
  throw new CodexProviderError(
@@ -153,6 +175,18 @@ function convertMessagesToPrompt(messages) {
153
175
  );
154
176
  }
155
177
 
178
+ /**
179
+ * Extract the combined text from a Codex SDK Input for prompt-based checks
180
+ * like $imagegen detection.
181
+ */
182
+ function extractPromptText(input) {
183
+ if (typeof input === 'string') return input;
184
+ return input
185
+ .filter((p) => p.type === 'text')
186
+ .map((p) => p.text)
187
+ .join('\n\n');
188
+ }
189
+
156
190
  /**
157
191
  * Get thread ID from continuation metadata
158
192
  * Codex thread IDs are stored in continuation store for resumption
@@ -185,12 +219,14 @@ function mapReasoningEffort(effort) {
185
219
  }
186
220
 
187
221
  /**
188
- * Create stream generator for Codex streaming responses
189
- * Yields raw Codex SDK events that will be normalized by ProviderStreamNormalizer
222
+ * Create stream generator for Codex streaming responses.
223
+ * `input` is the Codex SDK Input (string | UserInput[]) strings for plain
224
+ * text turns, arrays when images are attached.
225
+ * Yields raw Codex SDK events that will be normalized by ProviderStreamNormalizer.
190
226
  */
191
- async function* createStreamingGenerator(thread, prompt, signal) {
227
+ async function* createStreamingGenerator(thread, input, signal) {
192
228
  try {
193
- const { events } = await thread.runStreamed(prompt, { signal });
229
+ const { events } = await thread.runStreamed(input, { signal });
194
230
 
195
231
  for await (const event of events) {
196
232
  // Check for cancellation
@@ -256,8 +292,9 @@ export const codexProvider = {
256
292
  // Get Codex SDK
257
293
  const Codex = await getCodexSDK();
258
294
 
259
- // Convert messages to prompt
260
- const prompt = convertMessagesToPrompt(messages);
295
+ // Convert messages to Codex SDK input (string or structured parts with images)
296
+ const input = convertMessagesToCodexInput(messages);
297
+ const promptText = extractPromptText(input);
261
298
 
262
299
  // Get thread ID if resuming conversation
263
300
  const threadId =
@@ -289,7 +326,7 @@ export const codexProvider = {
289
326
  // into image generation via $imagegen — otherwise Codex can't save the
290
327
  // generated file. Leave higher modes (workspace-write, danger-full-access)
291
328
  // alone so an explicit user choice is never downgraded or escalated.
292
- const wantsImageGen = /\$imagegen\b/i.test(prompt);
329
+ const wantsImageGen = /\$imagegen\b/i.test(promptText);
293
330
  const sandboxMode =
294
331
  wantsImageGen && configuredSandboxMode === 'read-only'
295
332
  ? 'workspace-write'
@@ -324,12 +361,12 @@ export const codexProvider = {
324
361
  // WORKAROUND: SDK's thread.run() hangs due to missing break after turn.completed
325
362
  // Always use streaming internally, consume synchronously when stream=false
326
363
  if (stream) {
327
- return createStreamingGenerator(thread, prompt, signal);
364
+ return createStreamingGenerator(thread, input, signal);
328
365
  }
329
366
 
330
367
  // Synchronous mode: consume streaming internally and return complete response
331
368
  const startTime = Date.now();
332
- const generator = createStreamingGenerator(thread, prompt, signal);
369
+ const generator = createStreamingGenerator(thread, input, signal);
333
370
 
334
371
  let content = '';
335
372
  let usage = null;