agent-state-machine 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/llm.js ADDED
@@ -0,0 +1,472 @@
1
+ /**
2
+ * File: /lib/llm.js
3
+ */
4
+
5
+ import fs from 'fs';
6
+ import path from 'path';
7
+ import { spawn, execSync } from 'child_process';
8
+ import { createRequire } from 'module';
9
+
10
+ const require = createRequire(import.meta.url);
11
+
12
+ /**
13
+ * LLM Helper Module
14
+ *
15
+ * Supports both CLI tools (claude, gemini, codex) and APIs (anthropic, openai)
16
+ *
17
+ * Usage:
18
+ * import { llm } from 'agent-state-machine';
19
+ * const response = await llm(context, { model: 'smart', prompt: 'Hello' });
20
+ */
21
+
22
+ /**
23
+ * Detect available CLI tools
24
+ */
25
+ export function detectAvailableCLIs() {
26
+ const clis = ['claude', 'gemini', 'codex', 'ollama'];
27
+ const available = {};
28
+
29
+ for (const cli of clis) {
30
+ try {
31
+ execSync(`which ${cli}`, { stdio: 'ignore' });
32
+ available[cli] = true;
33
+ } catch {
34
+ available[cli] = false;
35
+ }
36
+ }
37
+
38
+ return available;
39
+ }
40
+
41
+ /**
42
+ * Write the generated prompt file
43
+ */
44
+ export function writeGeneratedPrompt(workflowDir, content) {
45
+ const promptDir = path.join(workflowDir, 'state');
46
+ const promptFile = path.join(promptDir, 'generated-prompt.md');
47
+
48
+ if (!fs.existsSync(promptDir)) {
49
+ fs.mkdirSync(promptDir, { recursive: true });
50
+ }
51
+
52
+ fs.writeFileSync(promptFile, content);
53
+ return promptFile;
54
+ }
55
+
56
+ /**
57
+ * Build the full prompt with steering and context
58
+ */
59
+ export function buildPrompt(context, options) {
60
+ const parts = [];
61
+
62
+ // Add context summary if requested
63
+ if (options.includeContext !== false) {
64
+ const cleanContext = { ...context };
65
+ delete cleanContext._steering;
66
+ delete cleanContext._loop;
67
+ delete cleanContext._config;
68
+
69
+ if (Object.keys(cleanContext).length > 0) {
70
+ parts.push('# Current Context\n');
71
+ parts.push('```json\n');
72
+ parts.push(JSON.stringify(cleanContext, null, 2));
73
+ parts.push('\n```\n\n---\n');
74
+ }
75
+ }
76
+
77
+ // Add interaction format instruction
78
+ parts.push('# Interaction Format\n');
79
+ parts.push('IF YOU NEED TO ASK THE USER A QUESTION OR REQUEST INPUT, RESPOND WITH EXACTLY:\n');
80
+ parts.push('{ "interact": "your question here" }\n\n');
81
+ parts.push('Only use this format when you genuinely need user input to proceed.\n\n---\n');
82
+
83
+ // Add global steering if available
84
+ if (context._steering?.global) {
85
+ parts.push('# System Instructions\n');
86
+ parts.push(context._steering.global);
87
+ parts.push('\n---\n');
88
+ }
89
+
90
+ // Add the actual prompt
91
+ parts.push('# Task\n\n');
92
+ parts.push(options.prompt);
93
+
94
+ return parts.join('\n');
95
+ }
96
+
97
+ /**
98
+ * Execute CLI command and return response
99
+ */
100
+ async function executeCLI(command, promptFile, options = {}, apiKeys = {}) {
101
+ return new Promise((resolve, reject) => {
102
+ // Parse command to extract base command and args
103
+ // Note: naive split; if you need quoted args, consider a shell-args parser.
104
+ const parts = command.split(' ');
105
+ const baseCmd = parts[0];
106
+ const baseArgs = parts.slice(1);
107
+
108
+ // Build full args
109
+ const args = [...baseArgs];
110
+
111
+ const ensureCodexExec = () => {
112
+ const CODEX_SUBCOMMANDS = new Set([
113
+ 'exec', 'e',
114
+ 'review',
115
+ 'login', 'logout',
116
+ 'mcp', 'mcp-server',
117
+ 'app-server',
118
+ 'completion',
119
+ 'sandbox', 'debug',
120
+ 'apply', 'a',
121
+ 'resume',
122
+ 'cloud',
123
+ 'features',
124
+ 'help'
125
+ ]);
126
+
127
+ const optionsWithValues = new Set([
128
+ '-c', '--config',
129
+ '--enable', '--disable',
130
+ '-i', '--image',
131
+ '-m', '--model',
132
+ '-p', '--profile',
133
+ '-s', '--sandbox',
134
+ '-a', '--ask-for-approval',
135
+ '-C', '--cd',
136
+ '--local-provider',
137
+ '--add-dir',
138
+ '--color',
139
+ '--output-schema',
140
+ '-o', '--output-last-message'
141
+ ]);
142
+
143
+ // Insert `exec` after any leading global options so codex doesn't start interactive mode.
144
+ let i = 0;
145
+ while (i < args.length) {
146
+ const token = args[i];
147
+ if (!token.startsWith('-')) break;
148
+
149
+ if (optionsWithValues.has(token)) {
150
+ i += 2;
151
+ continue;
152
+ }
153
+
154
+ i += 1;
155
+ }
156
+
157
+ const firstNonOption = args[i];
158
+ if (firstNonOption && CODEX_SUBCOMMANDS.has(firstNonOption)) {
159
+ return;
160
+ }
161
+
162
+ args.splice(i, 0, 'exec');
163
+ };
164
+
165
+ // Different CLIs handle file input differently
166
+ if (baseCmd === 'claude') {
167
+ // Claude CLI: use stdin for prompt input
168
+ args.push('--print'); // Print response only
169
+ args.push('--permission-mode', 'acceptEdits');
170
+ // File content will be piped via stdin (no additional args needed)
171
+ } else if (baseCmd === 'gemini') {
172
+ // Gemini CLI
173
+ args.push('--approval-mode', 'auto_edit');
174
+ // No specific args needed for stdin input + one-shot mode
175
+ } else if (baseCmd === 'codex') {
176
+ // Codex CLI defaults to an interactive TUI, which requires a TTY.
177
+ // Force non-interactive mode via `codex exec`, and feed PROMPT via stdin ("-").
178
+ ensureCodexExec();
179
+
180
+ // Write only the final message to a file to avoid parsing extra output.
181
+ const lastMessageFile = path.join(
182
+ path.dirname(promptFile),
183
+ `codex-last-message-${process.pid}-${Date.now()}.txt`
184
+ );
185
+ args.push('--output-last-message', lastMessageFile);
186
+
187
+ args.push('-');
188
+ } else {
189
+ // Generic: try passing file as argument
190
+ args.push(promptFile);
191
+ }
192
+
193
+ console.log(` [LLM] Running: ${baseCmd} ${args.join(' ')}`);
194
+
195
+ // Prepare environment variables with API keys if provided
196
+ const env = { ...process.env };
197
+ if (apiKeys.gemini) env.GEMINI_API_KEY = apiKeys.gemini;
198
+ if (apiKeys.anthropic) env.ANTHROPIC_API_KEY = apiKeys.anthropic;
199
+ if (apiKeys.openai) env.OPENAI_API_KEY = apiKeys.openai;
200
+
201
+ const child = spawn(baseCmd, args, {
202
+ stdio: ['pipe', 'pipe', 'pipe'],
203
+ env: env
204
+ });
205
+
206
+ // Feed stdin for Codex, Claude, and Gemini from the prompt file; otherwise close stdin.
207
+ if (baseCmd === 'codex' || baseCmd === 'claude' || baseCmd === 'gemini') {
208
+ fs.createReadStream(promptFile).pipe(child.stdin);
209
+ } else {
210
+ child.stdin.end();
211
+ }
212
+
213
+ let stdout = '';
214
+ let stderr = '';
215
+
216
+ child.stdout.on('data', (data) => {
217
+ stdout += data.toString();
218
+ });
219
+
220
+ child.stderr.on('data', (data) => {
221
+ stderr += data.toString();
222
+ });
223
+
224
+ child.on('close', (code) => {
225
+ if (code === 0) {
226
+ if (baseCmd === 'codex') {
227
+ const outputFlagIndex = args.findIndex(a => a === '--output-last-message' || a === '-o');
228
+ const outputFile = outputFlagIndex >= 0 ? args[outputFlagIndex + 1] : null;
229
+ if (outputFile && fs.existsSync(outputFile)) {
230
+ try {
231
+ stdout = fs.readFileSync(outputFile, 'utf-8');
232
+ } finally {
233
+ try { fs.unlinkSync(outputFile); } catch {}
234
+ }
235
+ }
236
+ }
237
+
238
+ resolve({
239
+ text: stdout.trim(),
240
+ model: command,
241
+ provider: 'cli',
242
+ usage: null
243
+ });
244
+ } else {
245
+ reject(new Error(`CLI command failed (exit ${code}): ${stderr || stdout}`));
246
+ }
247
+ });
248
+
249
+ child.on('error', (err) => {
250
+ reject(new Error(`Failed to execute CLI: ${err.message}`));
251
+ });
252
+ });
253
+ }
254
+
255
+ /**
256
+ * Execute API call and return response
257
+ */
258
+ async function executeAPI(provider, model, prompt, apiKey, options = {}) {
259
+ console.log(` [LLM] Calling API: ${provider}/${model}`);
260
+
261
+ if (provider === 'anthropic') {
262
+ // Dynamic import to avoid requiring the package if not used
263
+ let Anthropic;
264
+ try {
265
+ Anthropic = require('@anthropic-ai/sdk');
266
+ } catch {
267
+ throw new Error('Anthropic SDK not installed. Run: npm install @anthropic-ai/sdk');
268
+ }
269
+
270
+ const client = new Anthropic({ apiKey });
271
+
272
+ const response = await client.messages.create({
273
+ model: model,
274
+ max_tokens: options.maxTokens || 4096,
275
+ messages: [{ role: 'user', content: prompt }]
276
+ });
277
+
278
+ return {
279
+ text: response.content[0].text,
280
+ model: model,
281
+ provider: 'anthropic',
282
+ usage: {
283
+ inputTokens: response.usage.input_tokens,
284
+ outputTokens: response.usage.output_tokens
285
+ }
286
+ };
287
+ }
288
+
289
+ if (provider === 'openai') {
290
+ let OpenAI;
291
+ try {
292
+ OpenAI = require('openai');
293
+ } catch {
294
+ throw new Error('OpenAI SDK not installed. Run: npm install openai');
295
+ }
296
+
297
+ const client = new OpenAI({ apiKey });
298
+
299
+ const response = await client.chat.completions.create({
300
+ model: model,
301
+ max_tokens: options.maxTokens || 4096,
302
+ messages: [{ role: 'user', content: prompt }]
303
+ });
304
+
305
+ return {
306
+ text: response.choices[0].message.content,
307
+ model: model,
308
+ provider: 'openai',
309
+ usage: {
310
+ inputTokens: response.usage.prompt_tokens,
311
+ outputTokens: response.usage.completion_tokens
312
+ }
313
+ };
314
+ }
315
+
316
+ throw new Error(`Unknown API provider: ${provider}`);
317
+ }
318
+
319
+ /**
320
+ * Main LLM function
321
+ *
322
+ * @param {object} context - The workflow context (contains _config, _steering, etc.)
323
+ * @param {object} options - Options for the LLM call
324
+ * @param {string} options.model - Model key from workflow.js models config
325
+ * @param {string} options.prompt - The prompt to send
326
+ * @param {boolean} options.includeContext - Whether to include context in prompt (default: true)
327
+ * @param {number} options.maxTokens - Max tokens for API calls (default: 4096)
328
+ * @param {string} options.workflowDir - Workflow directory (usually from context)
329
+ *
330
+ * @returns {Promise<{text: string, model: string, provider: string, usage: object|null}>}
331
+ */
332
+ export async function llm(context, options) {
333
+ if (!options.prompt) {
334
+ throw new Error('llm() requires a prompt');
335
+ }
336
+
337
+ if (!options.model) {
338
+ throw new Error('llm() requires a model key');
339
+ }
340
+
341
+ const config = context._config || {};
342
+ const models = config.models || {};
343
+ const apiKeys = config.apiKeys || {};
344
+ const workflowDir = config.workflowDir || process.cwd();
345
+
346
+ // Look up the model command/config
347
+ const modelConfig = models[options.model];
348
+
349
+ if (!modelConfig) {
350
+ const available = Object.keys(models).join(', ');
351
+ throw new Error(
352
+ `Unknown model key: "${options.model}". Available models: ${available || 'none defined'}`
353
+ );
354
+ }
355
+
356
+ // Build the full prompt
357
+ const fullPrompt = buildPrompt(context, options);
358
+
359
+ // Write to generated-prompt.md
360
+ const promptFile = writeGeneratedPrompt(workflowDir, fullPrompt);
361
+ console.log(` [LLM] Prompt written to: ${promptFile}`);
362
+
363
+ // Check if it's an API call or CLI
364
+ if (modelConfig.startsWith('api:')) {
365
+ // Format: api:provider:model
366
+ const parts = modelConfig.split(':');
367
+ const provider = parts[1];
368
+ const model = parts.slice(2).join(':');
369
+
370
+ const apiKey = apiKeys[provider] || process.env[`${provider.toUpperCase()}_API_KEY`];
371
+
372
+ if (!apiKey) {
373
+ throw new Error(
374
+ `No API key found for ${provider}. Set in workflow.js apiKeys or ${provider.toUpperCase()}_API_KEY env var`
375
+ );
376
+ }
377
+
378
+ return executeAPI(provider, model, fullPrompt, apiKey, options);
379
+ }
380
+
381
+ // CLI execution
382
+ return executeCLI(modelConfig, promptFile, options, apiKeys);
383
+ }
384
+
385
+ /**
386
+ * Simple wrapper that just returns the text
387
+ */
388
+ export async function llmText(context, options) {
389
+ const response = await llm(context, options);
390
+ return response.text;
391
+ }
392
+
393
+ /**
394
+ * Parse interaction request from LLM response
395
+ * Detects { "interact": "question" } pattern in various formats
396
+ * @param {string} text - The LLM response text
397
+ * @returns {{ isInteraction: boolean, question?: string }}
398
+ */
399
+ export function parseInteractionRequest(text) {
400
+ if (!text || typeof text !== 'string') {
401
+ return { isInteraction: false };
402
+ }
403
+
404
+ // Match { "interact": "..." } with various formatting
405
+ // Supports: quoted/unquoted key, with/without spaces
406
+ const pattern = /\{\s*"?interact"?\s*:\s*"((?:[^"\\]|\\.)*)"\s*\}/;
407
+ const match = text.match(pattern);
408
+
409
+ if (match && match[1]) {
410
+ const question = match[1]
411
+ .replace(/\\n/g, '\n') // Unescape newlines
412
+ .replace(/\\"/g, '"') // Unescape quotes
413
+ .replace(/\\\\/g, '\\') // Unescape backslashes
414
+ .trim();
415
+
416
+ if (question.length > 0) {
417
+ return { isInteraction: true, question };
418
+ }
419
+ }
420
+
421
+ return { isInteraction: false };
422
+ }
423
+
424
+ /**
425
+ * Parse JSON from LLM response (handles markdown code blocks)
426
+ */
427
+ export function parseJSON(text) {
428
+ // Try direct parse first
429
+ try {
430
+ return JSON.parse(text);
431
+ } catch {}
432
+
433
+ // Try extracting from markdown code block
434
+ const jsonMatch = text.match(/```(?:json)?\s*([\s\S]*?)```/);
435
+ if (jsonMatch) {
436
+ try {
437
+ return JSON.parse(jsonMatch[1].trim());
438
+ } catch {}
439
+ }
440
+
441
+ // Try finding JSON object/array in text
442
+ const objectMatch = text.match(/\{[\s\S]*\}/);
443
+ if (objectMatch) {
444
+ try {
445
+ return JSON.parse(objectMatch[0]);
446
+ } catch {}
447
+ }
448
+
449
+ const arrayMatch = text.match(/\[[\s\S]*\]/);
450
+ if (arrayMatch) {
451
+ try {
452
+ return JSON.parse(arrayMatch[0]);
453
+ } catch {}
454
+ }
455
+
456
+ throw new Error('Could not parse JSON from LLM response');
457
+ }
458
+
459
+ /**
460
+ * LLM call that expects JSON response
461
+ */
462
+ export async function llmJSON(context, options) {
463
+ const response = await llm(context, {
464
+ ...options,
465
+ prompt: options.prompt + '\n\nRespond with valid JSON only, no other text.'
466
+ });
467
+
468
+ return {
469
+ ...response,
470
+ data: parseJSON(response.text)
471
+ };
472
+ }