@canonmsg/codex-plugin 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/adapter.d.ts CHANGED
@@ -63,7 +63,7 @@ export declare class CodexConversationAdapter {
63
63
  setModel(model: string | null): void;
64
64
  isRunning(): boolean;
65
65
  interrupt(): Promise<void>;
66
- runTurn(prompt: string, onEvent: (event: CodexEvent) => void, onLog?: (line: string) => void): Promise<CodexTurnResult>;
66
+ runTurn(prompt: string, onEvent: (event: CodexEvent) => void, onLog?: (line: string) => void, imagePaths?: readonly string[]): Promise<CodexTurnResult>;
67
67
  private buildArgs;
68
68
  private clearActiveProcess;
69
69
  }
package/dist/adapter.js CHANGED
@@ -47,11 +47,11 @@ export class CodexConversationAdapter {
47
47
  this.child.kill('SIGKILL');
48
48
  }, 5_000);
49
49
  }
50
- async runTurn(prompt, onEvent, onLog) {
50
+ async runTurn(prompt, onEvent, onLog, imagePaths = []) {
51
51
  if (this.child) {
52
52
  throw new Error('A Codex turn is already in progress for this conversation');
53
53
  }
54
- const args = this.buildArgs(prompt);
54
+ const args = this.buildArgs(prompt, imagePaths);
55
55
  const child = spawn(this.codexBin, args, {
56
56
  cwd: this.cwd,
57
57
  stdio: ['ignore', 'pipe', 'pipe'],
@@ -141,7 +141,7 @@ export class CodexConversationAdapter {
141
141
  });
142
142
  });
143
143
  }
144
- buildArgs(prompt) {
144
+ buildArgs(prompt, imagePaths = []) {
145
145
  if (this.threadId) {
146
146
  const args = ['exec', 'resume', '--json', '--skip-git-repo-check'];
147
147
  if (this.model) {
@@ -159,6 +159,9 @@ export class CodexConversationAdapter {
159
159
  if (this.bypassApprovalsAndSandbox) {
160
160
  args.push('--dangerously-bypass-approvals-and-sandbox');
161
161
  }
162
+ for (const imagePath of imagePaths) {
163
+ args.push('-i', imagePath);
164
+ }
162
165
  args.push(this.threadId, prompt);
163
166
  return args;
164
167
  }
@@ -190,6 +193,9 @@ export class CodexConversationAdapter {
190
193
  if (execMode.bypassApprovalsAndSandbox) {
191
194
  args.push('--dangerously-bypass-approvals-and-sandbox');
192
195
  }
196
+ for (const imagePath of imagePaths) {
197
+ args.push('-i', imagePath);
198
+ }
193
199
  args.push(prompt);
194
200
  return args;
195
201
  }
@@ -49,7 +49,26 @@ export declare function buildCanonHostPrompt(input: {
49
49
  workSessions?: MessageCreatedPayload['workSessions'];
50
50
  buildInboundContextLines: (context: HostInboundParticipantContext) => string[];
51
51
  }): string;
52
- export declare function renderCanonHostInboundContent(message: HostInboundMessage): string;
52
+ /**
53
+ * Render the **text portion** of an inbound Canon message. Images are
54
+ * referenced by short placeholders — their actual bytes are delivered to the
55
+ * host as native vision/media inputs (Codex `-i <file>`, Anthropic image
56
+ * blocks). URLs are intentionally *not* inlined, since the harness never
57
+ * needs to refetch and earlier `[Image: <url>]` inlining caused vision
58
+ * models to see a string about an image instead of the image itself.
59
+ *
60
+ * `materialized` may be passed so non-image attachments can reference a
61
+ * local path the agent can Read. Without it we fall back to an unadorned
62
+ * placeholder; the vision path still works because image args carry the
63
+ * file path directly.
64
+ */
65
+ export declare function renderCanonHostInboundContent(message: HostInboundMessage, materialized?: ReadonlyArray<{
66
+ kind: 'image' | 'audio' | 'file';
67
+ path: string;
68
+ fileName?: string;
69
+ durationMs?: number;
70
+ index: number;
71
+ }>): string;
53
72
  export declare function buildHydratedInboundContext(input: {
54
73
  agentId: string;
55
74
  conversation: CanonConversation | null;
@@ -32,38 +32,56 @@ export function buildCanonHostPrompt(input) {
32
32
  input.content,
33
33
  ].join('\n');
34
34
  }
35
- export function renderCanonHostInboundContent(message) {
36
- let content = message.text || '';
37
- const attachment = message.attachments?.[0];
38
- if (attachment?.kind === 'audio' && attachment.url) {
39
- const duration = attachment.durationMs ? ` (${Math.round(attachment.durationMs / 1000)}s)` : '';
40
- content = content
41
- ? `[Voice message${duration}: ${attachment.url}]\n${content}`
42
- : `[Voice message${duration}: ${attachment.url}]`;
43
- }
44
- else if (attachment?.kind === 'image' && attachment.url) {
45
- content = content
46
- ? `[Image: ${attachment.url}]\n${content}`
47
- : `[Image: ${attachment.url}]`;
48
- }
49
- else if (attachment?.kind === 'file' && attachment.url) {
50
- const label = attachment.fileName || 'File';
51
- content = content
52
- ? `[File: ${label} ${attachment.url}]\n${content}`
53
- : `[File: ${label} ${attachment.url}]`;
35
+ /**
36
+ * Render the **text portion** of an inbound Canon message. Images are
37
+ * referenced by short placeholders — their actual bytes are delivered to the
38
+ * host as native vision/media inputs (Codex `-i <file>`, Anthropic image
39
+ * blocks). URLs are intentionally *not* inlined, since the harness never
40
+ * needs to refetch and earlier `[Image: <url>]` inlining caused vision
41
+ * models to see a string about an image instead of the image itself.
42
+ *
43
+ * `materialized` may be passed so non-image attachments can reference a
44
+ * local path the agent can Read. Without it we fall back to an unadorned
45
+ * placeholder; the vision path still works because image args carry the
46
+ * file path directly.
47
+ */
48
+ export function renderCanonHostInboundContent(message, materialized) {
49
+ const body = message.text || '';
50
+ const placeholders = [];
51
+ const attachments = message.attachments ?? [];
52
+ if (attachments.length > 0) {
53
+ for (let i = 0; i < attachments.length; i += 1) {
54
+ const att = attachments[i];
55
+ const mat = materialized?.find((m) => m.index === i) ?? null;
56
+ placeholders.push(describeAttachment(att, mat));
57
+ }
54
58
  }
55
59
  else if (message.contentType === 'audio' && message.audioUrl) {
56
- const duration = message.audioDurationMs ? ` (${Math.round(message.audioDurationMs / 1000)}s)` : '';
57
- content = content
58
- ? `[Voice message${duration}: ${message.audioUrl}]\n${content}`
59
- : `[Voice message${duration}: ${message.audioUrl}]`;
60
+ const duration = message.audioDurationMs
61
+ ? ` (${Math.round(message.audioDurationMs / 1000)}s)`
62
+ : '';
63
+ placeholders.push(`[Voice message${duration}]`);
60
64
  }
61
65
  else if (message.contentType === 'image' && message.imageUrl) {
62
- content = content
63
- ? `[Image: ${message.imageUrl}]\n${content}`
64
- : `[Image: ${message.imageUrl}]`;
66
+ placeholders.push('[Image attached]');
67
+ }
68
+ const rendered = [...placeholders, body].filter(Boolean).join('\n');
69
+ return rendered || '[Empty message]';
70
+ }
71
+ function describeAttachment(attachment, materialized) {
72
+ if (attachment.kind === 'image') {
73
+ return '[Image attached]';
74
+ }
75
+ if (attachment.kind === 'audio') {
76
+ const durationMs = materialized?.durationMs ?? attachment.durationMs;
77
+ const duration = durationMs ? ` (${Math.round(durationMs / 1000)}s)` : '';
78
+ const ref = materialized?.path ? ` ${materialized.path}` : '';
79
+ return `[Voice message${duration}${ref}]`;
65
80
  }
66
- return content || '[Empty message]';
81
+ // file
82
+ const label = materialized?.fileName ?? attachment.fileName ?? 'File';
83
+ const ref = materialized?.path ? ` ${materialized.path}` : '';
84
+ return `[File: ${label}${ref}]`;
67
85
  }
68
86
  export function buildHydratedInboundContext(input) {
69
87
  const history = buildParticipationHistorySnapshot(input.page?.messages ?? [], input.agentId);
package/dist/host.js CHANGED
@@ -3,7 +3,8 @@ import { setDefaultResultOrder } from 'node:dns';
3
3
  setDefaultResultOrder('ipv4first');
4
4
  import { randomUUID } from 'node:crypto';
5
5
  import { parseArgs } from 'node:util';
6
- import { buildConfiguredWorkspaceOptions, buildPublicWorkspaceOptions, ExecutionEnvironmentError, isEnabledFlag, CanonClient, CanonStream, clearSessionState, clearTurnState, DEFAULT_PARTICIPATION_HISTORY_FETCH_LIMIT, DEFAULT_RUNTIME_CAPABILITIES, FINAL_MESSAGE_HANDOFF_MS, getActiveProfile, initRTDBAuth, normalizeTurnMetadata, normalizeTurnState, prepareConversationEnvironment, releaseLock, releaseConversationEnvironment, resolveCanonAgent, rtdbRead, rtdbWrite, shouldTriggerAgentTurn, writeSessionState, writeTurnState, } from '@canonmsg/core';
6
+ import { getCodexImagePath, materializeMessageMedia, } from '@canonmsg/agent-sdk';
7
+ import { buildConfiguredWorkspaceOptions, buildPublicWorkspaceOptions, EXECUTION_ENVIRONMENT_MODES, ExecutionEnvironmentError, isEnabledFlag, CanonClient, CanonStream, clearSessionState, clearTurnState, DEFAULT_PARTICIPATION_HISTORY_FETCH_LIMIT, DEFAULT_RUNTIME_CAPABILITIES, FINAL_MESSAGE_HANDOFF_MS, getActiveProfile, initRTDBAuth, normalizeTurnMetadata, normalizeTurnState, prepareConversationEnvironment, releaseLock, releaseConversationEnvironment, resolveCanonAgent, rtdbRead, rtdbWrite, shouldTriggerAgentTurn, writeSessionState, writeTurnState, } from '@canonmsg/core';
7
8
  import { buildCanonHostPrompt, buildHydratedInboundContext, createConversationMetadataLoader, loadHostSessionConfig, publishHostAgentRuntime, renderCanonHostInboundContent, resolveHostWorkspaceCwd, } from './host-runtime.js';
8
9
  import { buildInboundContextLines, decideAutoReply, } from './inbound-policy.js';
9
10
  import { CodexConversationAdapter, } from './adapter.js';
@@ -43,6 +44,14 @@ async function publishAgentRuntime(agentId, runtime) {
43
44
  async function loadSessionConfig(conversationId, agentId) {
44
45
  return loadHostSessionConfig({ conversationId, agentId });
45
46
  }
47
+ const SESSION_EXECUTION_MODE_REQUIRED = 'Session execution mode required; please select a mode before starting the session.';
48
+ function requireSessionExecutionMode(config) {
49
+ const mode = config?.executionMode;
50
+ if (!mode) {
51
+ throw new ExecutionEnvironmentError(SESSION_EXECUTION_MODE_REQUIRED, SESSION_EXECUTION_MODE_REQUIRED);
52
+ }
53
+ return mode;
54
+ }
46
55
  function resolveWorkspaceCwd(config) {
47
56
  return resolveHostWorkspaceCwd({
48
57
  workspaceOptions,
@@ -57,8 +66,8 @@ function buildCanonPrompt(input) {
57
66
  ...input,
58
67
  });
59
68
  }
60
- function renderInboundContent(message) {
61
- return renderCanonHostInboundContent(message);
69
+ function renderInboundContent(message, materialized) {
70
+ return renderCanonHostInboundContent(message, materialized);
62
71
  }
63
72
  function summarizeCommand(command) {
64
73
  const trimmed = command.trim();
@@ -241,12 +250,16 @@ async function main() {
241
250
  }
242
251
  const creation = (async () => {
243
252
  const config = await loadSessionConfig(conversationId, agentId);
253
+ const sessionExecutionMode = requireSessionExecutionMode(config);
254
+ if (sessionExecutionMode === 'worktree' && !allowWorktrees) {
255
+ throw new ExecutionEnvironmentError('This host does not allow worktree sessions (launched without --enable-worktrees).', 'This Canon host was started without worktree isolation enabled. Choose "Lock the workspace" or restart the host with --enable-worktrees.');
256
+ }
244
257
  const workspaceCwd = resolveWorkspaceCwd(config);
245
258
  const environment = prepareConversationEnvironment({
246
259
  agentId,
247
260
  conversationId,
248
261
  workspaceCwd,
249
- allowWorktrees,
262
+ allowWorktrees: sessionExecutionMode === 'worktree',
250
263
  });
251
264
  try {
252
265
  const sessionCwd = environment.cwd;
@@ -303,8 +316,8 @@ async function main() {
303
316
  pendingSessionCreations.delete(conversationId);
304
317
  }
305
318
  }
306
- function enqueuePrompt(session, prompt, intent = 'queue', toFront = false, sourceMessageId, markAccepted = false) {
307
- const nextPrompt = { prompt, intent, sourceMessageId, markAccepted };
319
+ function enqueuePrompt(session, prompt, intent = 'queue', toFront = false, sourceMessageId, markAccepted = false, imagePaths = []) {
320
+ const nextPrompt = { prompt, intent, sourceMessageId, markAccepted, imagePaths };
308
321
  if (toFront) {
309
322
  session.queue.unshift(nextPrompt);
310
323
  }
@@ -316,7 +329,25 @@ async function main() {
316
329
  void runNextTurn(session);
317
330
  }
318
331
  async function enqueueInboundMessage(input) {
319
- const content = renderInboundContent(input.message);
332
+ let materialized = [];
333
+ if (input.message.id) {
334
+ try {
335
+ materialized = await materializeMessageMedia({
336
+ id: input.message.id,
337
+ attachments: input.message.attachments,
338
+ imageUrl: input.message.imageUrl ?? null,
339
+ audioUrl: input.message.audioUrl ?? null,
340
+ audioDurationMs: input.message.audioDurationMs ?? null,
341
+ }, { agentId, conversationId: input.conversationId });
342
+ }
343
+ catch (error) {
344
+ console.error(`[canon-codex] [${input.conversationId.slice(0, 8)}] Failed to materialize media:`, error instanceof Error ? error.message : error);
345
+ }
346
+ }
347
+ const imagePaths = materialized
348
+ .map((attachment) => getCodexImagePath(attachment))
349
+ .filter((path) => path !== null);
350
+ const content = renderInboundContent(input.message, materialized);
320
351
  const hydrated = await loadHydratedInboundContext({
321
352
  conversationId: input.conversationId,
322
353
  message: input.message,
@@ -360,14 +391,14 @@ async function main() {
360
391
  workSessions,
361
392
  });
362
393
  if (session.running && deliveryIntent === 'interrupt') {
363
- enqueuePrompt(session, prompt, deliveryIntent, true, input.message.id, shouldMarkAccepted);
394
+ enqueuePrompt(session, prompt, deliveryIntent, true, input.message.id, shouldMarkAccepted, imagePaths);
364
395
  console.error(`[canon-codex] [${input.conversationId.slice(0, 8)}] Interrupting current turn for explicit human send-now`);
365
396
  await session.adapter.interrupt().catch(() => { });
366
397
  clearStreaming(input.conversationId);
367
398
  client.setTyping(input.conversationId, false).catch(() => { });
368
399
  return;
369
400
  }
370
- enqueuePrompt(session, prompt, deliveryIntent, false, input.message.id, shouldMarkAccepted);
401
+ enqueuePrompt(session, prompt, deliveryIntent, false, input.message.id, shouldMarkAccepted, imagePaths);
371
402
  }
372
403
  async function runNextTurn(session) {
373
404
  if (session.running || session.closed)
@@ -393,6 +424,7 @@ async function main() {
393
424
  updatedAt: { '.sv': 'timestamp' },
394
425
  }).catch(() => { });
395
426
  try {
427
+ const turnImagePaths = nextTurn.imagePaths ?? [];
396
428
  const result = await session.adapter.runTurn(nextTurn.prompt, (event) => {
397
429
  session.lastActivity = Date.now();
398
430
  if (event.type === 'thread.started') {
@@ -426,7 +458,7 @@ async function main() {
426
458
  }
427
459
  }, (line) => {
428
460
  console.error(`[canon-codex] [${session.conversationId.slice(0, 8)}] ${line}`);
429
- });
461
+ }, turnImagePaths);
430
462
  if (result.threadId) {
431
463
  saveStoredThreadId(agentId, session.conversationId, session.cwd, result.threadId);
432
464
  }
@@ -503,10 +535,14 @@ async function main() {
503
535
  }
504
536
  let controlStopped = false;
505
537
  let streamConnected = false;
538
+ const hostAvailableExecutionModes = allowWorktrees
539
+ ? [...EXECUTION_ENVIRONMENT_MODES]
540
+ : ['locked'];
506
541
  let runtimeDescriptor = {
507
542
  defaultWorkspaceId: workspaceOptions[0]?.id,
508
543
  ...(typeof args.model === 'string' ? { defaultModel: args.model } : {}),
509
544
  availableWorkspaces: buildPublicWorkspaceOptions(workspaceOptions),
545
+ availableExecutionModes: hostAvailableExecutionModes,
510
546
  };
511
547
  const publishRuntimeHeartbeat = async () => {
512
548
  if (!streamConnected)
@@ -550,12 +586,14 @@ async function main() {
550
586
  defaultWorkspaceId: workspaceOptions[0]?.id,
551
587
  ...(typeof args.model === 'string' ? { defaultModel: args.model } : {}),
552
588
  availableWorkspaces: buildPublicWorkspaceOptions(workspaceOptions),
589
+ availableExecutionModes: hostAvailableExecutionModes,
553
590
  };
554
591
  }
555
592
  catch {
556
593
  runtimeDescriptor = {
557
594
  defaultWorkspaceId: workspaceOptions[0]?.id,
558
595
  availableWorkspaces: buildPublicWorkspaceOptions(workspaceOptions),
596
+ availableExecutionModes: hostAvailableExecutionModes,
559
597
  };
560
598
  }
561
599
  try {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@canonmsg/codex-plugin",
3
- "version": "0.5.0",
3
+ "version": "0.6.0",
4
4
  "description": "Canon host integration for Codex CLI",
5
5
  "type": "module",
6
6
  "main": "dist/host.js",
@@ -22,6 +22,7 @@
22
22
  "prepack": "npm run build"
23
23
  },
24
24
  "dependencies": {
25
+ "@canonmsg/agent-sdk": "^0.8.0",
25
26
  "@canonmsg/core": "^0.7.0"
26
27
  },
27
28
  "engines": {