@adhdev/daemon-core 0.9.76-rc.61 → 0.9.76-rc.62

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -10,8 +10,8 @@ import * as path from 'path';
10
10
  import * as crypto from 'crypto';
11
11
  import * as fs from 'fs';
12
12
  import { createRequire } from 'node:module';
13
- import { normalizeInputEnvelope, type ProviderModule, flattenContent } from './contracts.js';
14
- import { assertTextOnlyInput } from './provider-input-support.js';
13
+ import { normalizeInputEnvelope, type ProviderModule, flattenContent, type InputEnvelope, type InputPart } from './contracts.js';
14
+ import { assertProviderSupportsDeclaredInput, getEffectiveMessageInputSupport } from './provider-input-support.js';
15
15
  import type { ProviderInstance, ProviderState, ProviderEvent, InstanceContext, ProviderErrorReason, HotChatSessionState, SessionModalState } from './provider-instance.js';
16
16
  import { ProviderCliAdapter } from '../cli-adapters/provider-cli-adapter.js';
17
17
  import type { CliProviderModule } from '../cli-adapters/provider-cli-adapter.js';
@@ -35,6 +35,95 @@ type PersistableCliHistoryMessage = {
35
35
  receivedAt?: number;
36
36
  };
37
37
 
38
+ const IMAGE_MIME_EXTENSIONS: Record<string, string> = {
39
+ 'image/png': '.png',
40
+ 'image/jpeg': '.jpg',
41
+ 'image/jpg': '.jpg',
42
+ 'image/gif': '.gif',
43
+ 'image/webp': '.webp',
44
+ 'image/bmp': '.bmp',
45
+ 'image/tiff': '.tiff',
46
+ 'image/svg+xml': '.svg',
47
+ };
48
+
49
+ function filePathFromUri(uri: string): string | null {
50
+ if (!uri) return null;
51
+ if (uri.startsWith('file://')) {
52
+ try {
53
+ return decodeURIComponent(new URL(uri).pathname);
54
+ } catch {
55
+ return uri.slice('file://'.length);
56
+ }
57
+ }
58
+ if (path.isAbsolute(uri)) return uri;
59
+ return null;
60
+ }
61
+
62
+ function extensionForImageMime(mimeType: string): string {
63
+ return IMAGE_MIME_EXTENSIONS[mimeType.toLowerCase()] || '.img';
64
+ }
65
+
66
+ function safeInputImageBasename(index: number, mimeType: string): string {
67
+ const extension = extensionForImageMime(mimeType);
68
+ const suffix = crypto.randomBytes(6).toString('hex');
69
+ return `adhdev-input-image-${Date.now()}-${index}-${suffix}${extension}`;
70
+ }
71
+
72
+ function materializeImageDataPart(part: Extract<InputPart, { type: 'image' }>, index: number, dir: string): string | null {
73
+ if (!part.data) return null;
74
+ const rawData = part.data.includes(',') ? part.data.split(',').pop() || '' : part.data;
75
+ if (!rawData) return null;
76
+ fs.mkdirSync(dir, { recursive: true });
77
+ const filePath = path.join(dir, safeInputImageBasename(index, part.mimeType));
78
+ fs.writeFileSync(filePath, Buffer.from(rawData, 'base64'));
79
+ return filePath;
80
+ }
81
+
82
+ export function buildCliStructuredInputPrompt(
83
+ input: InputEnvelope,
84
+ options: { materializeDir?: string } = {},
85
+ ): string {
86
+ const promptParts: string[] = [];
87
+ const imageRefs: string[] = [];
88
+ const resourceRefs: string[] = [];
89
+ const materializeDir = options.materializeDir || path.join(os.tmpdir(), 'adhdev-input-media');
90
+
91
+ input.parts.forEach((part, index) => {
92
+ if (part.type === 'text' && part.text.trim()) {
93
+ promptParts.push(part.text.trim());
94
+ return;
95
+ }
96
+
97
+ if (part.type === 'image') {
98
+ const localPath = typeof part.uri === 'string' ? filePathFromUri(part.uri) : null;
99
+ const materializedPath = !localPath && part.data ? materializeImageDataPart(part, index, materializeDir) : null;
100
+ const ref = localPath || materializedPath || part.uri || '';
101
+ if (ref) imageRefs.push(ref);
102
+ if (part.alt?.trim()) promptParts.push(part.alt.trim());
103
+ return;
104
+ }
105
+
106
+ if (part.type === 'resource_link') {
107
+ resourceRefs.push([part.title, part.name, part.description, part.uri].filter(Boolean).join('\n'));
108
+ return;
109
+ }
110
+
111
+ if (part.type === 'resource') {
112
+ resourceRefs.push([part.name, part.text, part.uri].filter(Boolean).join('\n'));
113
+ }
114
+ });
115
+
116
+ if (input.textFallback.trim()) promptParts.push(input.textFallback.trim());
117
+
118
+ const ordered = [
119
+ ...imageRefs,
120
+ ...promptParts,
121
+ ...resourceRefs,
122
+ ].filter((value, index, values) => value.trim().length > 0 && values.indexOf(value) === index);
123
+
124
+ return ordered.join('\n');
125
+ }
126
+
38
127
  function normalizePersistableCliHistoryContent(content: unknown): string {
39
128
  return flattenContent(content as any).replace(/\s+/g, ' ').trim();
40
129
  }
@@ -476,6 +565,7 @@ export class CliProviderInstance implements ProviderInstance {
476
565
  resume: this.provider.resume,
477
566
  controlValues: surface.controlValues,
478
567
  providerControls: this.provider.controls,
568
+ messageInput: getEffectiveMessageInputSupport(this.provider),
479
569
  summaryMetadata: surface.summaryMetadata as any,
480
570
  errorMessage: this.errorMessage,
481
571
  errorReason: this.errorReason,
@@ -532,9 +622,10 @@ export class CliProviderInstance implements ProviderInstance {
532
622
  onEvent(event: string, data?: any): void {
533
623
  if (event === 'send_message') {
534
624
  const input = normalizeInputEnvelope(data);
535
- assertTextOnlyInput(this.provider, input);
536
- if (input.textFallback) {
537
- void this.adapter.sendMessage(input.textFallback).catch((e: any) => {
625
+ assertProviderSupportsDeclaredInput(this.provider, input);
626
+ const promptText = buildCliStructuredInputPrompt(input);
627
+ if (promptText) {
628
+ void this.adapter.sendMessage(promptText).catch((e: any) => {
538
629
  LOG.warn('CLI', `[${this.type}] send_message failed: ${e?.message || e}`);
539
630
  });
540
631
  }
@@ -128,6 +128,7 @@ export type ContentBlock =
128
128
  | TextBlock
129
129
  | ImageBlock
130
130
  | AudioBlock
131
+ | VideoBlock
131
132
  | ResourceLinkBlock
132
133
  | ResourceBlock;
133
134
 
@@ -144,6 +145,7 @@ export interface ImageBlock {
144
145
  data: string; // base64-encoded
145
146
  mimeType: string; // 'image/png', 'image/jpeg', etc.
146
147
  uri?: string; // optional URL reference
148
+ alt?: string;
147
149
  annotations?: ContentAnnotations;
148
150
  }
149
151
 
@@ -152,6 +154,19 @@ export interface AudioBlock {
152
154
  type: 'audio';
153
155
  data: string; // base64-encoded
154
156
  mimeType: string;
157
+ uri?: string;
158
+ transcript?: string;
159
+ annotations?: ContentAnnotations;
160
+ }
161
+
162
+ /** Video content — ADHDev canonical display block. ACP prompt input degrades video to resource_link/text. */
163
+ export interface VideoBlock {
164
+ type: 'video';
165
+ data?: string; // base64-encoded
166
+ mimeType: string;
167
+ uri?: string;
168
+ transcript?: string;
169
+ posterUri?: string;
155
170
  annotations?: ContentAnnotations;
156
171
  }
157
172
 
@@ -595,7 +610,16 @@ export interface ProviderModule {
595
610
  // ─── Contract version / capability declaration ───
596
611
  contractVersion?: number;
597
612
  capabilities?: {
598
- input?: { multipart?: boolean; mediaTypes?: Array<'text' | 'image' | 'audio' | 'video' | 'resource'> };
613
+ input?: {
614
+ multipart?: boolean;
615
+ mediaTypes?: Array<'text' | 'image' | 'audio' | 'video' | 'resource'>;
616
+ strategies?: Array<{
617
+ mediaType: 'text' | 'image' | 'audio' | 'video' | 'resource';
618
+ strategies?: Array<'native' | 'native_acp' | 'resource_link' | 'text_fallback' | 'paste' | 'upload'>;
619
+ native?: boolean;
620
+ degradation?: Array<'native' | 'native_acp' | 'resource_link' | 'text_fallback' | 'paste' | 'upload'>;
621
+ }>;
622
+ };
599
623
  output?: { richContent?: boolean; mediaTypes?: Array<'text' | 'image' | 'audio' | 'video' | 'resource'> };
600
624
  controls?: { typedResults?: boolean };
601
625
  };
@@ -5,6 +5,7 @@ export type InputPart =
5
5
  | ImageInputPart
6
6
  | AudioInputPart
7
7
  | VideoInputPart
8
+ | ResourceLinkInputPart
8
9
  | ResourceInputPart
9
10
 
10
11
  export interface TextInputPart {
@@ -33,6 +34,7 @@ export interface VideoInputPart {
33
34
  mimeType: string
34
35
  uri?: string
35
36
  data?: string
37
+ transcript?: string
36
38
  posterUri?: string
37
39
  }
38
40
 
@@ -45,6 +47,17 @@ export interface ResourceInputPart {
45
47
  data?: string
46
48
  }
47
49
 
50
+ export interface ResourceLinkInputPart {
51
+ type: 'resource_link'
52
+ uri: string
53
+ name: string
54
+ title?: string
55
+ description?: string
56
+ mimeType?: string
57
+ size?: number
58
+ annotations?: ContentAnnotations
59
+ }
60
+
48
61
  export interface InputEnvelope {
49
62
  parts: InputPart[]
50
63
  textFallback: string
@@ -73,6 +86,7 @@ export interface ImageMessagePart {
73
86
  mimeType: string
74
87
  uri?: string
75
88
  data?: string
89
+ alt?: string
76
90
  annotations?: ContentAnnotations
77
91
  }
78
92
 
@@ -90,6 +104,7 @@ export interface VideoMessagePart {
90
104
  mimeType: string
91
105
  uri?: string
92
106
  data?: string
107
+ transcript?: string
93
108
  posterUri?: string
94
109
  annotations?: ContentAnnotations
95
110
  }
@@ -98,8 +113,11 @@ export interface ResourceLinkMessagePart {
98
113
  type: 'resource_link'
99
114
  uri: string
100
115
  name: string
116
+ title?: string
117
+ description?: string
101
118
  mimeType?: string
102
119
  size?: number
120
+ annotations?: ContentAnnotations
103
121
  }
104
122
 
105
123
  export interface ResourceMessagePart {
@@ -149,6 +167,10 @@ export function flattenMessageParts(parts: MessagePart[]): string {
149
167
  .map((part) => {
150
168
  if (part.type === 'text') return part.text
151
169
  if (part.type === 'resource') return part.resource.text || ''
170
+ if (part.type === 'image') return part.alt || (part.data ? `[image: ${part.mimeType}]` : '')
171
+ if (part.type === 'audio') return part.transcript || (part.data ? `[audio: ${part.mimeType}]` : '')
172
+ if (part.type === 'video') return part.transcript || (part.data ? `[video: ${part.mimeType}]` : '')
173
+ if (part.type === 'resource_link') return [part.name, part.description].filter(Boolean).join('\n')
152
174
  return ''
153
175
  })
154
176
  .filter((value) => value.length > 0)
@@ -247,6 +269,7 @@ function normalizeInputPartObject(raw: Record<string, unknown>): InputPart | nul
247
269
  mimeType: raw.mimeType,
248
270
  ...(typeof raw.uri === 'string' ? { uri: raw.uri } : {}),
249
271
  ...(typeof raw.data === 'string' ? { data: raw.data } : {}),
272
+ ...(typeof raw.transcript === 'string' ? { transcript: raw.transcript } : {}),
250
273
  ...(typeof raw.posterUri === 'string' ? { posterUri: raw.posterUri } : {}),
251
274
  }
252
275
  }
@@ -262,10 +285,14 @@ function normalizeInputPartObject(raw: Record<string, unknown>): InputPart | nul
262
285
  }
263
286
  if (type === 'resource_link' && typeof raw.uri === 'string') {
264
287
  return {
265
- type: 'resource',
288
+ type,
266
289
  uri: raw.uri,
290
+ name: typeof raw.name === 'string' ? raw.name : getUriDisplayName(raw.uri, 'resource'),
291
+ ...(typeof raw.title === 'string' ? { title: raw.title } : {}),
292
+ ...(typeof raw.description === 'string' ? { description: raw.description } : {}),
267
293
  ...(typeof raw.mimeType === 'string' ? { mimeType: raw.mimeType } : {}),
268
- ...(typeof raw.name === 'string' ? { name: raw.name } : {}),
294
+ ...(typeof raw.size === 'number' && Number.isFinite(raw.size) ? { size: raw.size } : {}),
295
+ ...normalizeAnnotationsProperty(raw.annotations),
269
296
  }
270
297
  }
271
298
  return null
@@ -282,6 +309,7 @@ function normalizeMessagePartObject(raw: Record<string, unknown>): MessagePart |
282
309
  mimeType: raw.mimeType,
283
310
  ...(typeof raw.uri === 'string' ? { uri: raw.uri } : {}),
284
311
  ...(typeof raw.data === 'string' ? { data: raw.data } : {}),
312
+ ...(typeof raw.alt === 'string' ? { alt: raw.alt } : {}),
285
313
  }
286
314
  }
287
315
  if (type === 'audio' && typeof raw.mimeType === 'string') {
@@ -299,6 +327,7 @@ function normalizeMessagePartObject(raw: Record<string, unknown>): MessagePart |
299
327
  mimeType: raw.mimeType,
300
328
  ...(typeof raw.uri === 'string' ? { uri: raw.uri } : {}),
301
329
  ...(typeof raw.data === 'string' ? { data: raw.data } : {}),
330
+ ...(typeof raw.transcript === 'string' ? { transcript: raw.transcript } : {}),
302
331
  ...(typeof raw.posterUri === 'string' ? { posterUri: raw.posterUri } : {}),
303
332
  }
304
333
  }
@@ -307,8 +336,11 @@ function normalizeMessagePartObject(raw: Record<string, unknown>): MessagePart |
307
336
  type,
308
337
  uri: raw.uri,
309
338
  name: raw.name,
339
+ ...(typeof raw.title === 'string' ? { title: raw.title } : {}),
340
+ ...(typeof raw.description === 'string' ? { description: raw.description } : {}),
310
341
  ...(typeof raw.mimeType === 'string' ? { mimeType: raw.mimeType } : {}),
311
- ...(typeof raw.size === 'number' ? { size: raw.size } : {}),
342
+ ...(typeof raw.size === 'number' && Number.isFinite(raw.size) ? { size: raw.size } : {}),
343
+ ...normalizeAnnotationsProperty(raw.annotations),
312
344
  }
313
345
  }
314
346
  if (type === 'resource' && raw.resource && typeof raw.resource === 'object') {
@@ -331,10 +363,36 @@ function flattenInputParts(parts: InputPart[]): string {
331
363
  return parts
332
364
  .map((part) => {
333
365
  if (part.type === 'text') return part.text
334
- if (part.type === 'audio') return part.transcript || ''
335
- if (part.type === 'resource') return part.text || ''
366
+ if (part.type === 'image') return part.alt || (part.data ? `[image: ${part.mimeType}]` : '')
367
+ if (part.type === 'audio') return part.transcript || (part.data ? `[audio: ${part.mimeType}]` : '')
368
+ if (part.type === 'video') return part.transcript || (part.data ? `[video: ${part.mimeType}]` : '')
369
+ if (part.type === 'resource_link') return [part.title, part.name, part.description, part.uri].filter(Boolean).join('\n')
370
+ if (part.type === 'resource') return part.text || part.name || part.uri
336
371
  return ''
337
372
  })
338
373
  .filter((value) => value.length > 0)
339
374
  .join('\n')
340
375
  }
376
+
377
+ function getUriDisplayName(uri: string, fallback: string): string {
378
+ try {
379
+ const pathname = uri.startsWith('file://') ? new URL(uri).pathname : uri
380
+ return pathname.split(/[\\/]/).filter(Boolean).pop() || fallback
381
+ } catch {
382
+ return uri.split(/[\\/]/).filter(Boolean).pop() || fallback
383
+ }
384
+ }
385
+
386
+ function normalizeAnnotationsProperty(value: unknown): { annotations?: ContentAnnotations } {
387
+ if (!value || typeof value !== 'object') return {}
388
+ const record = value as Record<string, unknown>
389
+ const annotations: ContentAnnotations = {}
390
+ if (Array.isArray(record.audience)) {
391
+ const audience = record.audience.filter((item): item is 'user' | 'assistant' => item === 'user' || item === 'assistant')
392
+ if (audience.length > 0) annotations.audience = audience
393
+ }
394
+ if (typeof record.priority === 'number' && Number.isFinite(record.priority)) {
395
+ annotations.priority = record.priority
396
+ }
397
+ return Object.keys(annotations).length > 0 ? { annotations } : {}
398
+ }
@@ -1,8 +1,38 @@
1
1
  import type { InputEnvelope, ProviderModule } from './contracts.js'
2
2
 
3
- type InputMediaType = 'text' | 'image' | 'audio' | 'video' | 'resource'
3
+ export type InputMediaType = 'text' | 'image' | 'audio' | 'video' | 'resource'
4
+
5
+ export type InputAttachmentStrategy =
6
+ | 'native'
7
+ | 'native_acp'
8
+ | 'resource_link'
9
+ | 'text_fallback'
10
+ | 'paste'
11
+ | 'upload'
12
+
13
+ export interface InputMediaStrategyDescriptor {
14
+ mediaType: InputMediaType
15
+ strategies: InputAttachmentStrategy[]
16
+ native?: boolean
17
+ degradation?: InputAttachmentStrategy[]
18
+ }
19
+
20
+ export interface MessageInputSupport {
21
+ text: boolean
22
+ multipart: boolean
23
+ mediaTypes: InputMediaType[]
24
+ strategies: InputMediaStrategyDescriptor[]
25
+ }
4
26
 
5
27
  const VALID_INPUT_MEDIA_TYPES = new Set<InputMediaType>(['text', 'image', 'audio', 'video', 'resource'])
28
+ const VALID_INPUT_STRATEGIES = new Set<InputAttachmentStrategy>(['native', 'native_acp', 'resource_link', 'text_fallback', 'paste', 'upload'])
29
+
30
+ export const TEXT_ONLY_MESSAGE_INPUT_SUPPORT: MessageInputSupport = Object.freeze<MessageInputSupport>({
31
+ text: true,
32
+ multipart: false,
33
+ mediaTypes: ['text'],
34
+ strategies: [],
35
+ })
6
36
 
7
37
  function getProviderLabel(provider?: Pick<ProviderModule, 'name' | 'type'> | null): string {
8
38
  return provider?.name || provider?.type || 'This provider'
@@ -44,14 +74,108 @@ export function assertTextOnlyInput(provider: Pick<ProviderModule, 'name' | 'typ
44
74
  export function getDeclaredProviderInputSupport(provider?: Pick<ProviderModule, 'capabilities'> | null): {
45
75
  multipart: boolean
46
76
  mediaTypes: Set<InputMediaType>
77
+ strategies: InputMediaStrategyDescriptor[]
47
78
  } {
48
79
  const rawMediaTypes = Array.isArray(provider?.capabilities?.input?.mediaTypes)
49
80
  ? provider?.capabilities?.input?.mediaTypes.filter((type): type is InputMediaType => VALID_INPUT_MEDIA_TYPES.has(type as InputMediaType))
50
81
  : []
82
+ const strategies = normalizeInputStrategyDescriptors(provider?.capabilities?.input?.strategies)
51
83
 
52
84
  return {
53
85
  multipart: provider?.capabilities?.input?.multipart === true,
54
86
  mediaTypes: new Set<InputMediaType>(rawMediaTypes.length > 0 ? rawMediaTypes : ['text']),
87
+ strategies,
88
+ }
89
+ }
90
+
91
+ export function normalizeInputStrategyDescriptors(raw: unknown): InputMediaStrategyDescriptor[] {
92
+ if (!Array.isArray(raw)) return []
93
+ const result: InputMediaStrategyDescriptor[] = []
94
+ for (const entry of raw) {
95
+ if (!entry || typeof entry !== 'object') continue
96
+ const record = entry as Record<string, unknown>
97
+ const mediaType = record.mediaType
98
+ if (typeof mediaType !== 'string' || !VALID_INPUT_MEDIA_TYPES.has(mediaType as InputMediaType)) continue
99
+ const strategies = Array.isArray(record.strategies)
100
+ ? record.strategies.filter((value): value is InputAttachmentStrategy => typeof value === 'string' && VALID_INPUT_STRATEGIES.has(value as InputAttachmentStrategy))
101
+ : []
102
+ const degradation = Array.isArray(record.degradation)
103
+ ? record.degradation.filter((value): value is InputAttachmentStrategy => typeof value === 'string' && VALID_INPUT_STRATEGIES.has(value as InputAttachmentStrategy))
104
+ : []
105
+ if (strategies.length === 0 && degradation.length === 0) continue
106
+ result.push({
107
+ mediaType: mediaType as InputMediaType,
108
+ strategies,
109
+ ...(typeof record.native === 'boolean' ? { native: record.native } : {}),
110
+ ...(degradation.length > 0 ? { degradation } : {}),
111
+ })
112
+ }
113
+ return result
114
+ }
115
+
116
+ function promptCapabilityFlags(runtimeCapabilities?: Record<string, any> | null): { image: boolean; audio: boolean; embeddedContext: boolean } {
117
+ const prompt = runtimeCapabilities?.promptCapabilities || {}
118
+ return {
119
+ image: prompt.image === true,
120
+ audio: prompt.audio === true,
121
+ embeddedContext: prompt.embeddedContext === true,
122
+ }
123
+ }
124
+
125
+ function supportFromDeclared(provider?: Pick<ProviderModule, 'capabilities'> | null): MessageInputSupport {
126
+ const declared = getDeclaredProviderInputSupport(provider)
127
+ return {
128
+ text: true,
129
+ multipart: declared.multipart,
130
+ mediaTypes: Array.from(declared.mediaTypes),
131
+ strategies: declared.strategies,
132
+ }
133
+ }
134
+
135
+ export function getEffectiveMessageInputSupport(
136
+ provider?: Pick<ProviderModule, 'category' | 'capabilities'> | null,
137
+ runtimeCapabilities?: Record<string, any> | null,
138
+ ): MessageInputSupport {
139
+ if (provider?.category !== 'acp') {
140
+ const declared = supportFromDeclared(provider)
141
+ return {
142
+ ...declared,
143
+ mediaTypes: [...declared.mediaTypes],
144
+ strategies: declared.strategies.map((strategy) => ({
145
+ ...strategy,
146
+ strategies: [...strategy.strategies],
147
+ ...(strategy.degradation ? { degradation: [...strategy.degradation] } : {}),
148
+ })),
149
+ }
150
+ }
151
+
152
+ const declared = supportFromDeclared(provider)
153
+ const caps = promptCapabilityFlags(runtimeCapabilities)
154
+ const mediaTypes = new Set<InputMediaType>(['text'])
155
+ const strategies: InputMediaStrategyDescriptor[] = []
156
+
157
+ if (declared.mediaTypes.includes('resource')) {
158
+ mediaTypes.add('resource')
159
+ strategies.push({ mediaType: 'resource', strategies: caps.embeddedContext ? ['native_acp', 'resource_link', 'text_fallback'] : ['resource_link', 'text_fallback'], native: caps.embeddedContext, degradation: ['resource_link', 'text_fallback'] })
160
+ }
161
+ if (declared.mediaTypes.includes('video')) {
162
+ mediaTypes.add('video')
163
+ strategies.push({ mediaType: 'video', strategies: ['resource_link', 'text_fallback'], native: false, degradation: ['resource_link', 'text_fallback'] })
164
+ }
165
+ if (declared.mediaTypes.includes('image')) {
166
+ mediaTypes.add('image')
167
+ strategies.push({ mediaType: 'image', strategies: caps.image ? ['native_acp', 'resource_link', 'text_fallback'] : ['resource_link', 'text_fallback'], native: caps.image, degradation: ['resource_link', 'text_fallback'] })
168
+ }
169
+ if (declared.mediaTypes.includes('audio')) {
170
+ mediaTypes.add('audio')
171
+ strategies.push({ mediaType: 'audio', strategies: caps.audio ? ['native_acp', 'resource_link', 'text_fallback'] : ['resource_link', 'text_fallback'], native: caps.audio, degradation: ['resource_link', 'text_fallback'] })
172
+ }
173
+
174
+ return {
175
+ text: true,
176
+ multipart: declared.multipart && mediaTypes.size > 1,
177
+ mediaTypes: Array.from(mediaTypes),
178
+ strategies,
55
179
  }
56
180
  }
57
181
 
@@ -10,6 +10,7 @@
10
10
 
11
11
  import type { ProviderModule, ProviderSettingDef, ProviderResumeCapability } from './contracts.js';
12
12
  import type { AcpConfigOption, AcpMode, ProviderControlSchema, ProviderSummaryMetadata, SessionCapability } from '../shared-types.js';
13
+ import type { MessageInputSupport } from './provider-input-support.js';
13
14
  import type { ChatMessage } from '../types.js';
14
15
 
15
16
  // ─── ProviderState — Discriminated union by category ─────────────
@@ -87,6 +88,7 @@ interface ProviderStateBase {
87
88
  runtime?: ProviderRuntimeInfo;
88
89
  resume?: ProviderResumeCapability;
89
90
  sessionCapabilities?: SessionCapability[];
91
+ messageInput?: MessageInputSupport;
90
92
  /** Dynamic control current values */
91
93
  controlValues?: Record<string, string | number | boolean>;
92
94
  /** Provider-declared controls schema (from provider.controls) */
@@ -2,6 +2,7 @@ import type { ProviderControlDef, ProviderControlType, ProviderModule } from './
2
2
  import { providerHasOpenPanelSupport } from './open-panel-support.js'
3
3
 
4
4
  const VALID_CAPABILITY_MEDIA_TYPES = new Set(['text', 'image', 'audio', 'video', 'resource'])
5
+ const VALID_INPUT_STRATEGIES = new Set(['native', 'native_acp', 'resource_link', 'text_fallback', 'paste', 'upload'])
5
6
 
6
7
  const KNOWN_PROVIDER_FIELDS = new Set<string>([
7
8
  'type',
@@ -158,16 +159,45 @@ function validateCapabilities(provider: ProviderModule, controls: ProviderContro
158
159
  }
159
160
 
160
161
  const input = capabilities.input
161
- if (!input || typeof input !== 'object') {
162
- errors.push('capabilities.input is required')
163
- } else {
164
- if (typeof input.multipart !== 'boolean') {
162
+ if (input !== undefined) {
163
+ if (!input || typeof input !== 'object') {
164
+ errors.push('capabilities.input must be an object when provided')
165
+ } else if (typeof input.multipart !== 'boolean') {
165
166
  errors.push('capabilities.input.multipart must be boolean')
166
167
  }
167
- if (!Array.isArray(input.mediaTypes) || input.mediaTypes.length === 0) {
168
- errors.push('capabilities.input.mediaTypes must be a non-empty array')
169
- } else if (input.mediaTypes.some((type) => typeof type !== 'string' || !VALID_CAPABILITY_MEDIA_TYPES.has(type))) {
170
- errors.push(`capabilities.input.mediaTypes must only include: ${Array.from(VALID_CAPABILITY_MEDIA_TYPES).join(', ')}`)
168
+ if (input && typeof input === 'object') {
169
+ const mediaTypes = Array.isArray(input.mediaTypes) ? input.mediaTypes : undefined
170
+ if (!mediaTypes || mediaTypes.length === 0) {
171
+ errors.push('capabilities.input.mediaTypes must be a non-empty array')
172
+ } else if (mediaTypes.some((type) => typeof type !== 'string' || !VALID_CAPABILITY_MEDIA_TYPES.has(type))) {
173
+ errors.push(`capabilities.input.mediaTypes must only include: ${Array.from(VALID_CAPABILITY_MEDIA_TYPES).join(', ')}`)
174
+ }
175
+ }
176
+ if (input && typeof input === 'object' && input.strategies !== undefined) {
177
+ if (!Array.isArray(input.strategies)) {
178
+ errors.push('capabilities.input.strategies must be an array when provided')
179
+ } else {
180
+ for (const strategy of input.strategies) {
181
+ if (!strategy || typeof strategy !== 'object' || Array.isArray(strategy)) {
182
+ errors.push('capabilities.input.strategies entries must be objects')
183
+ continue
184
+ }
185
+ const entry = strategy as Record<string, unknown>
186
+ if (typeof entry.mediaType !== 'string' || !VALID_CAPABILITY_MEDIA_TYPES.has(entry.mediaType)) {
187
+ errors.push(`capabilities.input.strategies.mediaType must only include: ${Array.from(VALID_CAPABILITY_MEDIA_TYPES).join(', ')}`)
188
+ }
189
+ for (const field of ['strategies', 'degradation'] as const) {
190
+ const values = entry[field]
191
+ if (values === undefined) continue
192
+ if (!Array.isArray(values) || values.some((value) => typeof value !== 'string' || !VALID_INPUT_STRATEGIES.has(value))) {
193
+ errors.push(`capabilities.input.strategies.${field} must only include: ${Array.from(VALID_INPUT_STRATEGIES).join(', ')}`)
194
+ }
195
+ }
196
+ if (entry.native !== undefined && typeof entry.native !== 'boolean') {
197
+ errors.push('capabilities.input.strategies.native must be boolean when provided')
198
+ }
199
+ }
200
+ }
171
201
  }
172
202
  }
173
203
 
@@ -336,7 +336,9 @@ export type SessionCapability =
336
336
  | 'mark_notification_unread';
337
337
 
338
338
  import type { RuntimeWriteOwner, RuntimeAttachedClient, SessionStatus } from './shared-types-extra.js';
339
+ import type { MessageInputSupport } from './providers/provider-input-support.js';
339
340
  export type { RuntimeWriteOwner, RuntimeAttachedClient, SessionStatus } from './shared-types-extra.js';
341
+ export type { MessageInputSupport, InputMediaStrategyDescriptor, InputAttachmentStrategy, InputMediaType } from './providers/provider-input-support.js';
340
342
 
341
343
  export interface SessionEntry {
342
344
  id: string;
@@ -364,6 +366,8 @@ export interface SessionEntry {
364
366
  resume?: ProviderResumeCapability;
365
367
  activeChat: SessionActiveChatData | null;
366
368
  capabilities?: SessionCapability[];
369
+ /** Effective message input/media support for this session. Defaults fail-closed to text-only. */
370
+ messageInput?: MessageInputSupport;
367
371
  cdpConnected?: boolean;
368
372
  /** Dynamic control current values (generic key-value) */
369
373
  controlValues?: Record<string, string | number | boolean>;
@@ -30,6 +30,7 @@ import {
30
30
  IDE_PROVIDER_SESSION_CAPABILITIES_BASE,
31
31
  EXTENSION_PROVIDER_SESSION_CAPABILITIES_BASE,
32
32
  } from '../providers/open-panel-support.js';
33
+ import { TEXT_ONLY_MESSAGE_INPUT_SUPPORT } from '../providers/provider-input-support.js';
33
34
 
34
35
  export type SessionEntryProfile = 'full' | 'live' | 'metadata';
35
36
 
@@ -189,7 +190,7 @@ function buildIdeWorkspaceSession(
189
190
  ...(git && { git }),
190
191
  activeChat,
191
192
  ...(summaryMetadata && { summaryMetadata }),
192
- ...(includeSessionMetadata && { capabilities: state.sessionCapabilities || IDE_SESSION_CAPABILITIES }),
193
+ ...(includeSessionMetadata && { capabilities: state.sessionCapabilities || IDE_SESSION_CAPABILITIES, messageInput: state.messageInput || TEXT_ONLY_MESSAGE_INPUT_SUPPORT }),
193
194
  cdpConnected: state.cdpConnected ?? isCdpConnected(cdpManagers, state.type),
194
195
  ...(includeSessionControls && {
195
196
  ...(controlValues && { controlValues }),
@@ -229,7 +230,7 @@ function buildExtensionAgentSession(
229
230
  ...(git && { git }),
230
231
  activeChat,
231
232
  ...(summaryMetadata && { summaryMetadata }),
232
- ...(includeSessionMetadata && { capabilities: ext.sessionCapabilities || EXTENSION_SESSION_CAPABILITIES }),
233
+ ...(includeSessionMetadata && { capabilities: ext.sessionCapabilities || EXTENSION_SESSION_CAPABILITIES, messageInput: ext.messageInput || TEXT_ONLY_MESSAGE_INPUT_SUPPORT }),
233
234
  ...(includeSessionControls && {
234
235
  ...(controlValues && { controlValues }),
235
236
  providerControls: ext.providerControls,
@@ -307,6 +308,7 @@ function buildCliSession(state: CliProviderState, options: SessionEntryBuildOpti
307
308
  ...(summaryMetadata && { summaryMetadata }),
308
309
  ...(includeSessionMetadata && {
309
310
  capabilities: state.mode === 'terminal' ? PTY_SESSION_CAPABILITIES : CLI_CHAT_SESSION_CAPABILITIES,
311
+ messageInput: state.messageInput || TEXT_ONLY_MESSAGE_INPUT_SUPPORT,
310
312
  }),
311
313
  ...(includeSessionControls && {
312
314
  ...(controlValues && { controlValues }),
@@ -341,7 +343,7 @@ function buildAcpSession(state: AcpProviderState, options: SessionEntryBuildOpti
341
343
  ...(git && { git }),
342
344
  activeChat,
343
345
  ...(summaryMetadata && { summaryMetadata }),
344
- ...(includeSessionMetadata && { capabilities: ACP_SESSION_CAPABILITIES }),
346
+ ...(includeSessionMetadata && { capabilities: ACP_SESSION_CAPABILITIES, messageInput: state.messageInput || TEXT_ONLY_MESSAGE_INPUT_SUPPORT }),
345
347
  ...(includeSessionControls && {
346
348
  ...(controlValues && { controlValues }),
347
349
  providerControls: state.providerControls,