agent-state-machine 2.1.2 → 2.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/lib/llm.js CHANGED
@@ -39,6 +39,81 @@ export function detectAvailableCLIs() {
39
39
  return available;
40
40
  }
41
41
 
42
+ /**
43
+ * Get response format instructions based on response type
44
+ * Used by buildPrompt to inject appropriate interaction format instructions
45
+ */
46
+ function getResponseFormatInstructions(responseType) {
47
+ if (responseType === 'choice') {
48
+ return `# Response Format
49
+
50
+ When you need user input, respond with a structured choice:
51
+
52
+ {
53
+ "interact": {
54
+ "type": "choice",
55
+ "slug": "unique-slug",
56
+ "prompt": "Your question here?",
57
+ "options": [
58
+ { "key": "key1", "label": "Display Label", "description": "Help text" }
59
+ ],
60
+ "multiSelect": false,
61
+ "allowCustom": true
62
+ }
63
+ }
64
+
65
+ Rules:
66
+ - slug: unique identifier (e.g., "scope-platform")
67
+ - options: 2-5 choices with key, label, and optional description
68
+ - multiSelect: true allows selecting multiple options
69
+ - allowCustom: true shows "Other" for free-text input
70
+ - Ask ONE question at a time
71
+ `;
72
+ }
73
+
74
+ if (responseType === 'confirm') {
75
+ return `# Response Format
76
+
77
+ When you need user confirmation, respond with:
78
+
79
+ {
80
+ "interact": {
81
+ "type": "confirm",
82
+ "slug": "unique-slug",
83
+ "prompt": "Are you sure about X?",
84
+ "confirmLabel": "Yes, proceed",
85
+ "cancelLabel": "No, cancel"
86
+ }
87
+ }
88
+ `;
89
+ }
90
+
91
+ if (responseType === 'text') {
92
+ return `# Response Format
93
+
94
+ When you need text input, respond with:
95
+
96
+ {
97
+ "interact": {
98
+ "type": "text",
99
+ "slug": "unique-slug",
100
+ "prompt": "Please describe X:",
101
+ "placeholder": "Enter details...",
102
+ "validation": { "minLength": 10 }
103
+ }
104
+ }
105
+ `;
106
+ }
107
+
108
+ // Default: basic interact format
109
+ return `# Interaction Format
110
+ IF YOU NEED TO ASK THE USER A QUESTION OR REQUEST INPUT, RESPOND WITH EXACTLY:
111
+ { "interact": "your question here" }
112
+
113
+ Only use this format when you genuinely need user input to proceed.
114
+ `;
115
+ }
116
+
42
117
  /**
43
118
  * Build the full prompt with steering and context
44
119
  */
@@ -65,11 +140,9 @@ export function buildPrompt(context, options) {
65
140
  }
66
141
  }
67
142
 
68
- // Add interaction format instruction
69
- parts.push('# Interaction Format\n');
70
- parts.push('IF YOU NEED TO ASK THE USER A QUESTION OR REQUEST INPUT, RESPOND WITH EXACTLY:\n');
71
- parts.push('{ "interact": "your question here" }\n\n');
72
- parts.push('Only use this format when you genuinely need user input to proceed.\n\n---\n');
143
+ // Add response format instructions (based on responseType option)
144
+ parts.push(getResponseFormatInstructions(options.responseType));
145
+ parts.push('\n---\n');
73
146
 
74
147
  // Add global steering if available (always first)
75
148
  if (context._steering?.global) {
@@ -193,7 +193,7 @@ export class RemoteClient {
193
193
 
194
194
  await this.send({
195
195
  ...event,
196
- type: 'event', // Must come after spread to not be overwritten by event.type
196
+ _action: 'event', // Use _action for message routing to preserve event.type (interaction type)
197
197
  sessionToken: this.sessionToken,
198
198
  });
199
199
  }
@@ -260,7 +260,8 @@ async function executeMDAgent(runtime, agentPath, name, params, options = {}) {
260
260
  const fullPrompt = buildPrompt(context, {
261
261
  model,
262
262
  prompt: interpolatedPrompt,
263
- includeContext: config.includeContext !== 'false'
263
+ includeContext: config.includeContext !== 'false',
264
+ responseType: config.response
264
265
  });
265
266
 
266
267
  await logAgentStart(runtime, name, fullPrompt);
@@ -270,7 +271,8 @@ async function executeMDAgent(runtime, agentPath, name, params, options = {}) {
270
271
  response = await llm(context, {
271
272
  model: model,
272
273
  prompt: interpolatedPrompt,
273
- includeContext: config.includeContext !== 'false'
274
+ includeContext: config.includeContext !== 'false',
275
+ responseType: config.response
274
276
  });
275
277
 
276
278
  // Parse output based on format
@@ -297,17 +299,26 @@ async function executeMDAgent(runtime, agentPath, name, params, options = {}) {
297
299
  }
298
300
 
299
301
  // Check for interaction request
300
- const explicitInteraction =
301
- config.format === 'interaction' ||
302
- config.interaction === 'true' ||
303
- (typeof config.interaction === 'string' && config.interaction.length > 0);
304
-
305
302
  const parsedInteraction = parseInteractionRequest(response.text);
306
303
  const structuredInteraction =
307
304
  config.autoInteract !== 'false' && parsedInteraction.isInteraction;
308
305
 
306
+ // Check if agent returned an 'interact' object in its JSON response
307
+ const hasInteractKey = output && typeof output === 'object' && output.interact;
308
+
309
+ // Explicit interaction mode (format: interaction OR interaction: true)
310
+ // But only trigger if agent actually wants to interact (has interact key or parsed interaction)
311
+ const explicitInteraction =
312
+ config.format === 'interaction' ||
313
+ ((config.interaction === 'true' || (typeof config.interaction === 'string' && config.interaction.length > 0)) &&
314
+ (hasInteractKey || structuredInteraction));
315
+
309
316
  if (explicitInteraction || structuredInteraction) {
317
+ // Use interact object if present, otherwise fall back to parsed/raw
318
+ const interactionData = hasInteractKey ? output.interact : (structuredInteraction ? parsedInteraction : null);
319
+
310
320
  const slugRaw =
321
+ interactionData?.slug ||
311
322
  (typeof config.interaction === 'string' && config.interaction !== 'true'
312
323
  ? config.interaction
313
324
  : null) ||
@@ -317,13 +328,19 @@ async function executeMDAgent(runtime, agentPath, name, params, options = {}) {
317
328
 
318
329
  const slug = sanitizeSlug(slugRaw);
319
330
  const targetKey = config.interactionKey || outputKey || slug;
320
- const interactionContent = structuredInteraction ? parsedInteraction.question : response.text;
321
-
322
- const userResponse = await handleInteraction(runtime, {
331
+
332
+ // Build interaction object with full metadata
333
+ const interactionObj = hasInteractKey ? {
334
+ ...output.interact,
335
+ slug,
336
+ targetKey
337
+ } : {
323
338
  slug,
324
339
  targetKey,
325
- content: interactionContent
326
- }, name);
340
+ content: structuredInteraction ? parsedInteraction.question : response.text
341
+ };
342
+
343
+ const userResponse = await handleInteraction(runtime, interactionObj, name);
327
344
 
328
345
  // Return the user's response as the agent result
329
346
  if (outputKey) {
@@ -479,12 +496,12 @@ ${content}
479
496
  event: 'INTERACTION_REQUESTED',
480
497
  slug,
481
498
  targetKey,
482
- question: prompt || content,
483
499
  type: interaction.type || 'text',
484
- prompt,
500
+ prompt: prompt || content,
485
501
  options: interaction.options,
486
502
  allowCustom: interaction.allowCustom,
487
503
  multiSelect: interaction.multiSelect,
504
+ placeholder: interaction.placeholder,
488
505
  validation: interaction.validation,
489
506
  confirmLabel: interaction.confirmLabel,
490
507
  cancelLabel: interaction.cancelLabel,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agent-state-machine",
3
- "version": "2.1.2",
3
+ "version": "2.1.4",
4
4
  "type": "module",
5
5
  "description": "A workflow orchestrator for running agents and scripts in sequence with state management",
6
6
  "main": "lib/index.js",
@@ -2,6 +2,7 @@
2
2
  model: med
3
3
  format: json
4
4
  interaction: true
5
+ response: choice
5
6
  ---
6
7
 
7
8
  # Assumptions Clarifier Agent
@@ -22,26 +23,22 @@ Identify implicit assumptions that could impact the project. Consider:
22
23
 
23
24
  **Technical Assumptions:**
24
25
  - Technology stack preferences
25
- - Development environment
26
26
  - Existing infrastructure
27
27
  - Third-party dependencies
28
28
 
29
29
  **Business Assumptions:**
30
30
  - Timeline expectations
31
- - Budget constraints
32
31
  - Team composition/skills
33
- - Stakeholder availability
34
32
 
35
33
  **Domain Assumptions:**
36
34
  - Industry regulations
37
35
  - Compliance requirements
38
- - Domain-specific constraints
39
36
 
40
- If assumptions need validation, ask using the interact format:
41
-
42
- {
43
- "interact": "Please confirm or clarify these assumptions:\n\n1. Technology Stack:\n - A: I have a preferred stack (specify below)\n - B: Use best practices for the project type\n - C: Must integrate with existing system\n\n2. Development Timeline:\n - A: Prototype/MVP focus (speed over polish)\n - B: Production-ready from start\n - C: Iterative releases planned\n\n3. Existing Codebase:\n - A: Starting from scratch\n - B: Building on existing code\n - C: Migrating from legacy system\n\nPlease respond with your choices and details:"
44
- }
37
+ If assumptions need validation, ask ONE question. Example slugs:
38
+ - "assume-stack": Technology stack preference
39
+ - "assume-timeline": Development approach (MVP, production-ready, iterative)
40
+ - "assume-codebase": Starting point (greenfield, existing code, migration)
41
+ - "assume-infra": Infrastructure constraints
45
42
 
46
43
  If assumptions are clear, return:
47
44
 
@@ -2,6 +2,7 @@
2
2
  model: med
3
3
  format: json
4
4
  interaction: true
5
+ response: choice
5
6
  ---
6
7
 
7
8
  # Requirements Clarifier Agent
@@ -23,19 +24,16 @@ Based on the project description and scope, identify requirements that need clar
23
24
  - Core features and user stories
24
25
  - Data models and relationships
25
26
  - User workflows and interactions
26
- - Input/output specifications
27
27
 
28
28
  **Non-Functional Requirements:**
29
29
  - Performance expectations
30
- - Scalability needs
31
- - Reliability/uptime requirements
32
- - Accessibility requirements
30
+ - Scalability and reliability needs
33
31
 
34
- If requirements need clarification, ask using the interact format:
35
-
36
- {
37
- "interact": "Please clarify the following requirements:\n\n1. Data Storage:\n - A: Local storage only\n - B: Cloud database required\n - C: Hybrid (local + cloud sync)\n\n2. Authentication:\n - A: No authentication needed\n - B: Simple username/password\n - C: OAuth/SSO integration\n - D: Multi-factor authentication\n\n[Add more questions as needed]\n\nPlease respond with your choices and details:"
38
- }
32
+ If requirements need clarification, ask ONE question. Example slugs:
33
+ - "req-storage": Data storage approach (local, cloud, hybrid)
34
+ - "req-auth": Authentication method (none, basic, OAuth, MFA)
35
+ - "req-offline": Offline capability needs
36
+ - "req-realtime": Real-time features needed
39
37
 
40
38
  If requirements are clear, return:
41
39
 
@@ -2,6 +2,7 @@
2
2
  model: med
3
3
  format: json
4
4
  interaction: true
5
+ response: choice
5
6
  ---
6
7
 
7
8
  # Scope Clarifier Agent
@@ -23,11 +24,10 @@ Analyze the project description and determine if the scope is clear. Consider:
23
24
  - Platform/environment constraints
24
25
  - Integration requirements
25
26
 
26
- If the scope is unclear or ambiguous, ask clarifying questions using the interact format:
27
-
28
- {
29
- "interact": "Please clarify the following scope questions:\n\n1. Target Platform:\n - A: Web application\n - B: Mobile app\n - C: Desktop application\n - D: API/Backend service\n\n2. User Scale:\n - A: Single user / personal project\n - B: Small team (< 10 users)\n - C: Medium scale (10-1000 users)\n - D: Large scale (1000+ users)\n\n[Add more questions as needed]\n\nPlease respond with your choices (e.g., '1A, 2C') and any additional details:"
30
- }
27
+ If the scope is unclear, ask ONE clarifying question. Example slugs:
28
+ - "scope-platform": Target platform (web, mobile, desktop, API)
29
+ - "scope-scale": User scale (personal, team, enterprise)
30
+ - "scope-integrations": External integrations needed
31
31
 
32
32
  If the scope is sufficiently clear, return the scope summary:
33
33
 
@@ -2,6 +2,7 @@
2
2
  model: med
3
3
  format: json
4
4
  interaction: true
5
+ response: choice
5
6
  ---
6
7
 
7
8
  # Security Clarifier Agent
@@ -24,28 +25,20 @@ Analyze the project for security implications. Consider:
24
25
  **Data Security:**
25
26
  - Sensitive data handling (PII, financial, health)
26
27
  - Data encryption requirements
27
- - Data retention policies
28
28
 
29
29
  **Access Control:**
30
30
  - Authentication requirements
31
31
  - Authorization model
32
- - Role-based access needs
33
32
 
34
33
  **Compliance:**
35
34
  - Regulatory requirements (GDPR, HIPAA, PCI-DSS)
36
- - Industry standards
37
35
  - Audit requirements
38
36
 
39
- **Infrastructure:**
40
- - Network security
41
- - API security
42
- - Deployment security
43
-
44
- If security requirements need clarification, ask using the interact format:
45
-
46
- {
47
- "interact": "Please clarify security requirements:\n\n1. Sensitive Data:\n - A: No sensitive data handled\n - B: Personal information (names, emails)\n - C: Financial data (payments, transactions)\n - D: Health/medical data\n - E: Other regulated data\n\n2. Compliance Requirements:\n - A: No specific compliance needed\n - B: GDPR (EU data protection)\n - C: HIPAA (healthcare)\n - D: PCI-DSS (payment cards)\n - E: SOC2 / enterprise security\n\n3. Authentication Level:\n - A: Basic (username/password)\n - B: Enhanced (MFA, SSO)\n - C: Enterprise (LDAP, SAML)\n\nPlease respond with your choices and details:"
48
- }
37
+ If security requirements need clarification, ask ONE question. Example slugs:
38
+ - "sec-data": Sensitive data types handled (none, PII, financial, health)
39
+ - "sec-compliance": Compliance requirements (GDPR, HIPAA, PCI-DSS, SOC2)
40
+ - "sec-auth": Authentication level (basic, MFA, SSO, enterprise)
41
+ - "sec-audit": Audit/logging requirements
49
42
 
50
43
  If security requirements are clear, return:
51
44
 
@@ -8,6 +8,5 @@ export const config = {
8
8
  gemini: process.env.GEMINI_API_KEY,
9
9
  anthropic: process.env.ANTHROPIC_API_KEY,
10
10
  openai: process.env.OPENAI_API_KEY,
11
- },
12
- remotePath: "TczrLmUecnqZPpPhBTrvU374CGlfzDfINrr0eN0nMgQ",
11
+ }
13
12
  };
@@ -57,13 +57,13 @@ export default async function handler(req, res) {
57
57
 
58
58
  // Track current position for polling new events
59
59
  let lastEventIndex = await getEventsLength(token);
60
+ let pollCount = 0;
60
61
 
61
62
  const pollInterval = setInterval(async () => {
62
63
  try {
63
- // Refresh session TTL
64
- await refreshSession(token);
64
+ pollCount++;
65
65
 
66
- // Check for new events
66
+ // Check for new events (most important, do this every poll)
67
67
  const newLength = await getEventsLength(token);
68
68
 
69
69
  if (newLength > lastEventIndex) {
@@ -82,18 +82,23 @@ export default async function handler(req, res) {
82
82
  lastEventIndex = newLength;
83
83
  }
84
84
 
85
- // Check CLI status
86
- const updatedSession = await getSession(token);
87
- if (updatedSession && updatedSession.cliConnected !== session.cliConnected) {
88
- session.cliConnected = updatedSession.cliConnected;
89
- res.write(`data: ${JSON.stringify({
90
- type: updatedSession.cliConnected ? 'cli_reconnected' : 'cli_disconnected',
91
- })}\n\n`);
85
+ // Only check CLI status and refresh session every 5th poll (~15 seconds)
86
+ // This reduces Redis calls significantly
87
+ if (pollCount % 5 === 0) {
88
+ await refreshSession(token);
89
+
90
+ const updatedSession = await getSession(token);
91
+ if (updatedSession && updatedSession.cliConnected !== session.cliConnected) {
92
+ session.cliConnected = updatedSession.cliConnected;
93
+ res.write(`data: ${JSON.stringify({
94
+ type: updatedSession.cliConnected ? 'cli_reconnected' : 'cli_disconnected',
95
+ })}\n\n`);
96
+ }
92
97
  }
93
98
  } catch (err) {
94
99
  console.error('Error polling events:', err);
95
100
  }
96
- }, 1000); // Poll every 1 second for faster updates
101
+ }, 3000); // Poll every 3 seconds (was 1 second) - 3x reduction
97
102
 
98
103
  // Clean up on client disconnect
99
104
  req.on('close', () => {
@@ -44,14 +44,16 @@ export default async function handler(req, res) {
44
44
  */
45
45
  async function handlePost(req, res) {
46
46
  const body = typeof req.body === 'string' ? JSON.parse(req.body) : req.body;
47
- const { type, sessionToken } = body;
47
+ const { sessionToken } = body;
48
+ // Support both _action (new) and type (legacy) for message routing
49
+ const action = body._action || body.type;
48
50
 
49
51
  if (!sessionToken) {
50
52
  return res.status(400).json({ error: 'Missing sessionToken' });
51
53
  }
52
54
 
53
55
  try {
54
- switch (type) {
56
+ switch (action) {
55
57
  case 'session_init': {
56
58
  const { workflowName, history } = body;
57
59
 
@@ -89,9 +91,9 @@ async function handlePost(req, res) {
89
91
  ...eventData,
90
92
  };
91
93
 
92
- // Remove sessionToken and type from event data
94
+ // Remove routing fields, preserve type (interaction type like 'choice')
93
95
  delete historyEvent.sessionToken;
94
- delete historyEvent.type;
96
+ delete historyEvent._action;
95
97
 
96
98
  // Add to events list (single source of truth)
97
99
  await addEvent(sessionToken, historyEvent);
@@ -125,7 +127,7 @@ async function handlePost(req, res) {
125
127
  }
126
128
 
127
129
  default:
128
- return res.status(400).json({ error: `Unknown message type: ${type}` });
130
+ return res.status(400).json({ error: `Unknown action: ${action}` });
129
131
  }
130
132
  } catch (err) {
131
133
  console.error('Error handling CLI message:', err);
@@ -135,6 +137,7 @@ async function handlePost(req, res) {
135
137
 
136
138
  /**
137
139
  * Handle GET requests - long-poll for interaction responses
140
+ * Uses efficient polling with 5-second intervals (Upstash doesn't support BLPOP)
138
141
  */
139
142
  async function handleGet(req, res) {
140
143
  const { token, timeout = '30000' } = req.query;
@@ -148,16 +151,15 @@ async function handleGet(req, res) {
148
151
  return res.status(404).json({ error: 'Session not found' });
149
152
  }
150
153
 
151
- const timeoutMs = Math.min(parseInt(timeout, 10), 55000); // Max 55s for Vercel
154
+ // Max 50s for Vercel (leave buffer for response)
155
+ const timeoutMs = Math.min(parseInt(timeout, 10), 50000);
152
156
  const channel = KEYS.interactions(token);
153
-
154
- // Check for pending interactions using a list
155
157
  const pendingKey = `${channel}:pending`;
156
158
 
157
159
  try {
158
- // Try to get a pending interaction
159
160
  const startTime = Date.now();
160
161
 
162
+ // Poll every 5 seconds (10 calls per 50s timeout vs 50 calls before)
161
163
  while (Date.now() - startTime < timeoutMs) {
162
164
  const pending = await redis.lpop(pendingKey);
163
165
 
@@ -169,8 +171,8 @@ async function handleGet(req, res) {
169
171
  });
170
172
  }
171
173
 
172
- // Wait before checking again
173
- await new Promise((resolve) => setTimeout(resolve, 1000));
174
+ // Wait 5 seconds before checking again (was 1 second)
175
+ await new Promise((resolve) => setTimeout(resolve, 5000));
174
176
  }
175
177
 
176
178
  // Timeout - no interaction received
@@ -112,13 +112,15 @@ function sendJson(res, status, data) {
112
112
  */
113
113
  async function handleCliPost(req, res) {
114
114
  const body = await parseBody(req);
115
- const { type, sessionToken } = body;
115
+ const { sessionToken } = body;
116
+ // Support both _action (new) and type (legacy) for message routing
117
+ const action = body._action || body.type;
116
118
 
117
119
  if (!sessionToken) {
118
120
  return sendJson(res, 400, { error: 'Missing sessionToken' });
119
121
  }
120
122
 
121
- switch (type) {
123
+ switch (action) {
122
124
  case 'session_init': {
123
125
  const { workflowName, history } = body;
124
126
  createSession(sessionToken, { workflowName, history });
@@ -152,7 +154,7 @@ async function handleCliPost(req, res) {
152
154
  ...eventData,
153
155
  };
154
156
  delete historyEvent.sessionToken;
155
- delete historyEvent.type;
157
+ delete historyEvent._action; // Remove routing field, preserve type (interaction type)
156
158
 
157
159
  // Add to history
158
160
  session.history.unshift(historyEvent);
@@ -179,7 +181,7 @@ async function handleCliPost(req, res) {
179
181
  }
180
182
 
181
183
  default:
182
- return sendJson(res, 400, { error: `Unknown type: ${type}` });
184
+ return sendJson(res, 400, { error: `Unknown action: ${action}` });
183
185
  }
184
186
  }
185
187