cognitive-modules-cli 1.2.0 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,275 @@
1
+ /**
2
+ * Subagent - Orchestrate module calls with isolated execution contexts.
3
+ *
4
+ * Supports:
5
+ * - @call:module-name - Call another module
6
+ * - @call:module-name(args) - Call with arguments
7
+ * - context: fork - Isolated execution (no shared state)
8
+ * - context: main - Shared execution (default)
9
+ */
10
+
11
+ import type {
12
+ CognitiveModule,
13
+ ModuleResult,
14
+ ModuleInput,
15
+ Provider,
16
+ EnvelopeResponseV22
17
+ } from '../types.js';
18
+ import { loadModule, findModule, getDefaultSearchPaths } from './loader.js';
19
+ import { runModule } from './runner.js';
20
+
21
+ // =============================================================================
22
+ // Types
23
+ // =============================================================================
24
+
25
+ export interface SubagentContext {
26
+ parentId: string | null;
27
+ depth: number;
28
+ maxDepth: number;
29
+ results: Record<string, unknown>;
30
+ isolated: boolean;
31
+ }
32
+
33
+ export interface CallDirective {
34
+ module: string;
35
+ args: string;
36
+ match: string;
37
+ }
38
+
39
+ export interface SubagentRunOptions {
40
+ input?: ModuleInput;
41
+ validateInput?: boolean;
42
+ validateOutput?: boolean;
43
+ maxDepth?: number;
44
+ }
45
+
46
+ // =============================================================================
47
+ // Context Management
48
+ // =============================================================================
49
+
50
+ /**
51
+ * Create a new root context
52
+ */
53
+ export function createContext(maxDepth: number = 5): SubagentContext {
54
+ return {
55
+ parentId: null,
56
+ depth: 0,
57
+ maxDepth,
58
+ results: {},
59
+ isolated: false
60
+ };
61
+ }
62
+
63
+ /**
64
+ * Fork context (isolated - no inherited results)
65
+ */
66
+ export function forkContext(ctx: SubagentContext, moduleName: string): SubagentContext {
67
+ return {
68
+ parentId: moduleName,
69
+ depth: ctx.depth + 1,
70
+ maxDepth: ctx.maxDepth,
71
+ results: {},
72
+ isolated: true
73
+ };
74
+ }
75
+
76
+ /**
77
+ * Extend context (shared - inherits results)
78
+ */
79
+ export function extendContext(ctx: SubagentContext, moduleName: string): SubagentContext {
80
+ return {
81
+ parentId: moduleName,
82
+ depth: ctx.depth + 1,
83
+ maxDepth: ctx.maxDepth,
84
+ results: { ...ctx.results },
85
+ isolated: false
86
+ };
87
+ }
88
+
89
+ // =============================================================================
90
+ // Call Parsing
91
+ // =============================================================================
92
+
93
+ // Pattern to match @call:module-name or @call:module-name(args)
94
+ const CALL_PATTERN = /@call:([a-zA-Z0-9_-]+)(?:\(([^)]*)\))?/g;
95
+
96
+ /**
97
+ * Parse @call directives from text
98
+ */
99
+ export function parseCalls(text: string): CallDirective[] {
100
+ const calls: CallDirective[] = [];
101
+ let match: RegExpExecArray | null;
102
+
103
+ // Reset regex state
104
+ CALL_PATTERN.lastIndex = 0;
105
+
106
+ while ((match = CALL_PATTERN.exec(text)) !== null) {
107
+ calls.push({
108
+ module: match[1],
109
+ args: match[2] || '',
110
+ match: match[0]
111
+ });
112
+ }
113
+
114
+ return calls;
115
+ }
116
+
117
+ /**
118
+ * Replace @call directives with their results
119
+ */
120
+ export function substituteCallResults(
121
+ text: string,
122
+ callResults: Record<string, unknown>
123
+ ): string {
124
+ let result = text;
125
+
126
+ for (const [callStr, callResult] of Object.entries(callResults)) {
127
+ const resultStr = typeof callResult === 'object'
128
+ ? JSON.stringify(callResult, null, 2)
129
+ : String(callResult);
130
+
131
+ result = result.replace(callStr, `[Result from ${callStr}]:\n${resultStr}`);
132
+ }
133
+
134
+ return result;
135
+ }
136
+
137
+ // =============================================================================
138
+ // Orchestrator
139
+ // =============================================================================
140
+
141
+ export class SubagentOrchestrator {
142
+ private provider: Provider;
143
+ private running: Set<string> = new Set();
144
+ private cwd: string;
145
+
146
+ constructor(provider: Provider, cwd: string = process.cwd()) {
147
+ this.provider = provider;
148
+ this.cwd = cwd;
149
+ }
150
+
151
+ /**
152
+ * Run a module with subagent support.
153
+ * Recursively resolves @call directives before final execution.
154
+ */
155
+ async run(
156
+ moduleName: string,
157
+ options: SubagentRunOptions = {},
158
+ context?: SubagentContext
159
+ ): Promise<ModuleResult> {
160
+ const {
161
+ input = {},
162
+ validateInput = true,
163
+ validateOutput = true,
164
+ maxDepth = 5
165
+ } = options;
166
+
167
+ // Initialize context
168
+ const ctx = context ?? createContext(maxDepth);
169
+
170
+ // Check depth limit
171
+ if (ctx.depth > ctx.maxDepth) {
172
+ throw new Error(
173
+ `Max subagent depth (${ctx.maxDepth}) exceeded. Check for circular calls.`
174
+ );
175
+ }
176
+
177
+ // Prevent circular calls
178
+ if (this.running.has(moduleName)) {
179
+ throw new Error(`Circular call detected: ${moduleName}`);
180
+ }
181
+
182
+ this.running.add(moduleName);
183
+
184
+ try {
185
+ // Find and load module
186
+ const searchPaths = getDefaultSearchPaths(this.cwd);
187
+ const module = await findModule(moduleName, searchPaths);
188
+
189
+ if (!module) {
190
+ throw new Error(`Module not found: ${moduleName}`);
191
+ }
192
+
193
+ // Check if this module wants isolated execution
194
+ const moduleContextMode = module.context ?? 'main';
195
+
196
+ // Parse @call directives from prompt
197
+ const calls = parseCalls(module.prompt);
198
+ const callResults: Record<string, unknown> = {};
199
+
200
+ // Resolve each @call directive
201
+ for (const call of calls) {
202
+ const childModule = call.module;
203
+ const childArgs = call.args;
204
+
205
+ // Prepare child input
206
+ const childInput: ModuleInput = childArgs
207
+ ? { query: childArgs, code: childArgs }
208
+ : { ...input };
209
+
210
+ // Determine child context
211
+ const childContext = moduleContextMode === 'fork'
212
+ ? forkContext(ctx, moduleName)
213
+ : extendContext(ctx, moduleName);
214
+
215
+ // Recursively run child module
216
+ const childResult = await this.run(
217
+ childModule,
218
+ {
219
+ input: childInput,
220
+ validateInput: false, // Skip validation for @call args
221
+ validateOutput
222
+ },
223
+ childContext
224
+ );
225
+
226
+ // Store result
227
+ if (childResult.ok && 'data' in childResult) {
228
+ callResults[call.match] = childResult.data;
229
+ } else if ('error' in childResult) {
230
+ callResults[call.match] = { error: childResult.error };
231
+ }
232
+ }
233
+
234
+ // Substitute call results into prompt
235
+ let modifiedModule = module;
236
+ if (Object.keys(callResults).length > 0) {
237
+ const modifiedPrompt = substituteCallResults(module.prompt, callResults);
238
+ modifiedModule = {
239
+ ...module,
240
+ prompt: modifiedPrompt + '\n\n## Subagent Results Available\nThe @call results have been injected above. Use them in your response.\n'
241
+ };
242
+ }
243
+
244
+ // Run the module
245
+ const result = await runModule(modifiedModule, this.provider, {
246
+ input,
247
+ verbose: false,
248
+ useV22: true
249
+ });
250
+
251
+ // Store result in context
252
+ if (result.ok && 'data' in result) {
253
+ ctx.results[moduleName] = result.data;
254
+ }
255
+
256
+ return result;
257
+
258
+ } finally {
259
+ this.running.delete(moduleName);
260
+ }
261
+ }
262
+ }
263
+
264
+ /**
265
+ * Convenience function to run a module with subagent support
266
+ */
267
+ export async function runWithSubagents(
268
+ moduleName: string,
269
+ provider: Provider,
270
+ options: SubagentRunOptions & { cwd?: string } = {}
271
+ ): Promise<ModuleResult> {
272
+ const { cwd = process.cwd(), ...runOptions } = options;
273
+ const orchestrator = new SubagentOrchestrator(provider, cwd);
274
+ return orchestrator.run(moduleName, runOptions);
275
+ }
@@ -1,8 +1,17 @@
1
1
  /**
2
2
  * Base Provider - Abstract class for all LLM providers
3
+ * v2.5: Added streaming and multimodal support
3
4
  */
4
5
 
5
- import type { Provider, InvokeParams, InvokeResult } from '../types.js';
6
+ import type {
7
+ Provider,
8
+ InvokeParams,
9
+ InvokeResult,
10
+ ProviderV25,
11
+ InvokeParamsV25,
12
+ StreamingInvokeResult,
13
+ ModalityType
14
+ } from '../types.js';
6
15
 
7
16
  export abstract class BaseProvider implements Provider {
8
17
  abstract name: string;
@@ -27,3 +36,79 @@ export abstract class BaseProvider implements Provider {
27
36
  }
28
37
  }
29
38
  }
39
+
40
+ /**
41
+ * Base Provider with v2.5 streaming and multimodal support
42
+ */
43
+ export abstract class BaseProviderV25 extends BaseProvider implements ProviderV25 {
44
+ /**
45
+ * Check if this provider supports streaming
46
+ * Override in subclass to enable streaming
47
+ */
48
+ supportsStreaming(): boolean {
49
+ return false;
50
+ }
51
+
52
+ /**
53
+ * Check if this provider supports multimodal input/output
54
+ * Override in subclass to enable multimodal
55
+ */
56
+ supportsMultimodal(): { input: ModalityType[]; output: ModalityType[] } {
57
+ return {
58
+ input: ['text'],
59
+ output: ['text']
60
+ };
61
+ }
62
+
63
+ /**
64
+ * Invoke with streaming response
65
+ * Override in subclass to implement streaming
66
+ */
67
+ async invokeStream(params: InvokeParamsV25): Promise<StreamingInvokeResult> {
68
+ // Default: fallback to non-streaming with async generator wrapper
69
+ const result = await this.invoke(params);
70
+
71
+ async function* generateChunks(): AsyncIterable<string> {
72
+ yield result.content;
73
+ }
74
+
75
+ return {
76
+ stream: generateChunks(),
77
+ usage: result.usage
78
+ };
79
+ }
80
+
81
+ /**
82
+ * Format media inputs for the specific provider API
83
+ * Override in subclass for provider-specific formatting
84
+ */
85
+ protected formatMediaForProvider(
86
+ images?: Array<{ type: string; url?: string; data?: string; media_type?: string }>,
87
+ _audio?: Array<{ type: string; url?: string; data?: string; media_type?: string }>,
88
+ _video?: Array<{ type: string; url?: string; data?: string; media_type?: string }>
89
+ ): unknown[] {
90
+ // Default implementation for image-only providers (like OpenAI Vision)
91
+ if (!images || images.length === 0) {
92
+ return [];
93
+ }
94
+
95
+ return images.map(img => {
96
+ if (img.type === 'url' && img.url) {
97
+ return {
98
+ type: 'image_url',
99
+ image_url: {
100
+ url: img.url
101
+ }
102
+ };
103
+ } else if (img.type === 'base64' && img.data && img.media_type) {
104
+ return {
105
+ type: 'image_url',
106
+ image_url: {
107
+ url: `data:${img.media_type};base64,${img.data}`
108
+ }
109
+ };
110
+ }
111
+ return null;
112
+ }).filter(Boolean);
113
+ }
114
+ }
@@ -1,17 +1,35 @@
1
1
  /**
2
2
  * OpenAI Provider - OpenAI API (and compatible APIs)
3
+ * v2.5: Added streaming and multimodal (vision) support
3
4
  */
4
5
 
5
- import { BaseProvider } from './base.js';
6
- import type { InvokeParams, InvokeResult } from '../types.js';
6
+ import { BaseProviderV25 } from './base.js';
7
+ import type {
8
+ InvokeParams,
9
+ InvokeResult,
10
+ InvokeParamsV25,
11
+ StreamingInvokeResult,
12
+ ModalityType,
13
+ MediaInput
14
+ } from '../types.js';
7
15
 
8
- export class OpenAIProvider extends BaseProvider {
16
+ // Type for OpenAI message content
17
+ type OpenAIContentPart =
18
+ | { type: 'text'; text: string }
19
+ | { type: 'image_url'; image_url: { url: string; detail?: 'low' | 'high' | 'auto' } };
20
+
21
+ type OpenAIMessage = {
22
+ role: 'system' | 'user' | 'assistant';
23
+ content: string | OpenAIContentPart[];
24
+ };
25
+
26
+ export class OpenAIProvider extends BaseProviderV25 {
9
27
  name = 'openai';
10
28
  private apiKey: string;
11
29
  private model: string;
12
30
  private baseUrl: string;
13
31
 
14
- constructor(apiKey?: string, model = 'gpt-5.2', baseUrl = 'https://api.openai.com/v1') {
32
+ constructor(apiKey?: string, model = 'gpt-4o', baseUrl = 'https://api.openai.com/v1') {
15
33
  super();
16
34
  this.apiKey = apiKey || process.env.OPENAI_API_KEY || '';
17
35
  this.model = model;
@@ -22,6 +40,27 @@ export class OpenAIProvider extends BaseProvider {
22
40
  return !!this.apiKey;
23
41
  }
24
42
 
43
+ /**
44
+ * Check if streaming is supported (always true for OpenAI)
45
+ */
46
+ supportsStreaming(): boolean {
47
+ return true;
48
+ }
49
+
50
+ /**
51
+ * Check multimodal support (vision models)
52
+ */
53
+ supportsMultimodal(): { input: ModalityType[]; output: ModalityType[] } {
54
+ // Vision models support image input
55
+ const visionModels = ['gpt-4o', 'gpt-4-vision', 'gpt-4-turbo', 'gpt-4o-mini'];
56
+ const supportsVision = visionModels.some(m => this.model.includes(m));
57
+
58
+ return {
59
+ input: supportsVision ? ['text', 'image'] : ['text'],
60
+ output: ['text'] // DALL-E would be separate
61
+ };
62
+ }
63
+
25
64
  async invoke(params: InvokeParams): Promise<InvokeResult> {
26
65
  if (!this.isConfigured()) {
27
66
  throw new Error('OpenAI API key not configured. Set OPENAI_API_KEY environment variable.');
@@ -81,4 +120,187 @@ export class OpenAIProvider extends BaseProvider {
81
120
  } : undefined,
82
121
  };
83
122
  }
123
+
124
+ /**
125
+ * Invoke with streaming response
126
+ */
127
+ async invokeStream(params: InvokeParamsV25): Promise<StreamingInvokeResult> {
128
+ if (!this.isConfigured()) {
129
+ throw new Error('OpenAI API key not configured. Set OPENAI_API_KEY environment variable.');
130
+ }
131
+
132
+ const url = `${this.baseUrl}/chat/completions`;
133
+
134
+ // Build messages with multimodal content if present
135
+ const messages = this.buildMessagesWithMedia(params);
136
+
137
+ const body: Record<string, unknown> = {
138
+ model: this.model,
139
+ messages,
140
+ temperature: params.temperature ?? 0.7,
141
+ max_tokens: params.maxTokens ?? 4096,
142
+ stream: true,
143
+ };
144
+
145
+ // Add JSON mode if schema provided
146
+ if (params.jsonSchema) {
147
+ body.response_format = { type: 'json_object' };
148
+ }
149
+
150
+ const response = await fetch(url, {
151
+ method: 'POST',
152
+ headers: {
153
+ 'Content-Type': 'application/json',
154
+ 'Authorization': `Bearer ${this.apiKey}`,
155
+ },
156
+ body: JSON.stringify(body),
157
+ });
158
+
159
+ if (!response.ok) {
160
+ const error = await response.text();
161
+ throw new Error(`OpenAI API error: ${response.status} - ${error}`);
162
+ }
163
+
164
+ const bodyReader = response.body?.getReader();
165
+ if (!bodyReader) {
166
+ throw new Error('No response body');
167
+ }
168
+
169
+ const decoder = new TextDecoder();
170
+ let usage: { promptTokens: number; completionTokens: number; totalTokens: number } | undefined;
171
+
172
+ // Capture reader reference for closure
173
+ const reader = bodyReader;
174
+
175
+ // Create async generator for streaming
176
+ async function* streamGenerator(): AsyncIterable<string> {
177
+ let buffer = '';
178
+
179
+ while (true) {
180
+ const { done, value } = await reader.read();
181
+
182
+ if (done) break;
183
+
184
+ buffer += decoder.decode(value, { stream: true });
185
+
186
+ // Parse SSE events
187
+ const lines = buffer.split('\n');
188
+ buffer = lines.pop() || '';
189
+
190
+ for (const line of lines) {
191
+ if (line.startsWith('data: ')) {
192
+ const data = line.slice(6);
193
+
194
+ if (data === '[DONE]') {
195
+ return;
196
+ }
197
+
198
+ try {
199
+ const parsed = JSON.parse(data) as {
200
+ choices?: Array<{ delta?: { content?: string } }>;
201
+ usage?: { prompt_tokens?: number; completion_tokens?: number; total_tokens?: number };
202
+ };
203
+
204
+ const content = parsed.choices?.[0]?.delta?.content;
205
+ if (content) {
206
+ yield content;
207
+ }
208
+
209
+ // Capture usage if available
210
+ if (parsed.usage) {
211
+ usage = {
212
+ promptTokens: parsed.usage.prompt_tokens || 0,
213
+ completionTokens: parsed.usage.completion_tokens || 0,
214
+ totalTokens: parsed.usage.total_tokens || 0,
215
+ };
216
+ }
217
+ } catch {
218
+ // Skip malformed JSON
219
+ }
220
+ }
221
+ }
222
+ }
223
+ }
224
+
225
+ return {
226
+ stream: streamGenerator(),
227
+ usage
228
+ };
229
+ }
230
+
231
+ /**
232
+ * Build messages with multimodal content (images)
233
+ */
234
+ private buildMessagesWithMedia(params: InvokeParamsV25): OpenAIMessage[] {
235
+ const hasImages = params.images && params.images.length > 0;
236
+
237
+ if (!hasImages) {
238
+ return params.messages;
239
+ }
240
+
241
+ // Find the last user message and add images to it
242
+ const messages: OpenAIMessage[] = [];
243
+ const lastUserIdx = params.messages.findLastIndex(m => m.role === 'user');
244
+
245
+ for (let i = 0; i < params.messages.length; i++) {
246
+ const msg = params.messages[i];
247
+
248
+ if (i === lastUserIdx && hasImages) {
249
+ // Convert to multimodal content
250
+ const content: OpenAIContentPart[] = [
251
+ { type: 'text', text: msg.content }
252
+ ];
253
+
254
+ // Add images
255
+ for (const img of params.images!) {
256
+ const imageUrl = this.mediaInputToUrl(img);
257
+ if (imageUrl) {
258
+ content.push({
259
+ type: 'image_url',
260
+ image_url: { url: imageUrl, detail: 'auto' }
261
+ });
262
+ }
263
+ }
264
+
265
+ messages.push({ role: msg.role, content });
266
+ } else {
267
+ messages.push({ role: msg.role, content: msg.content });
268
+ }
269
+ }
270
+
271
+ // Add JSON schema instruction if needed
272
+ if (params.jsonSchema && lastUserIdx >= 0) {
273
+ const lastMsg = messages[lastUserIdx];
274
+ if (typeof lastMsg.content === 'string') {
275
+ lastMsg.content = lastMsg.content + this.buildJsonPrompt(params.jsonSchema);
276
+ } else {
277
+ // Content is array, append to text part
278
+ const textPart = lastMsg.content.find(p => p.type === 'text');
279
+ if (textPart && textPart.type === 'text') {
280
+ textPart.text = textPart.text + this.buildJsonPrompt(params.jsonSchema);
281
+ }
282
+ }
283
+ }
284
+
285
+ return messages;
286
+ }
287
+
288
+ /**
289
+ * Convert MediaInput to URL for OpenAI API
290
+ */
291
+ private mediaInputToUrl(media: MediaInput): string | null {
292
+ switch (media.type) {
293
+ case 'url':
294
+ return media.url;
295
+ case 'base64':
296
+ return `data:${media.media_type};base64,${media.data}`;
297
+ case 'file':
298
+ // File paths would need to be loaded first
299
+ // This should be handled by the runner before calling the provider
300
+ console.warn('[cognitive] File media input not pre-loaded, skipping');
301
+ return null;
302
+ default:
303
+ return null;
304
+ }
305
+ }
84
306
  }