@myvillage/cli 1.10.2 → 1.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@myvillage/cli",
3
- "version": "1.10.2",
3
+ "version": "1.18.0",
4
4
  "description": "MyVillageOS CLI for community developers",
5
5
  "type": "module",
6
6
  "bin": {
@@ -12,7 +12,8 @@ import { getMCPTools, cleanupMCPClients } from './mcp-client.js';
12
12
  import { gatherContext } from './context.js';
13
13
  import { isWithinActiveHours, getNextCheckInMs } from './scheduler.js';
14
14
  import { parse as parseYaml } from 'yaml';
15
- import { postAgentHeartbeat } from '../utils/api.js';
15
+ import { postAgentHeartbeat, listAgentTasks, claimAgentTask, completeAgentTask } from '../utils/api.js';
16
+ import { readAgentWisdom } from '../utils/wisdom.js';
16
17
 
17
18
  export async function agentLoop(agentName, { signal }) {
18
19
  const agentDir = join(homedir(), '.myvillage', 'agents', agentName);
@@ -107,19 +108,55 @@ export async function agentLoop(agentName, { signal }) {
107
108
  };
108
109
  let feedItemsRead = 0;
109
110
  let mentionsFound = 0;
111
+ // Hoisted so the catch block can mark in-flight tasks FAILED.
112
+ let activeTask = null;
113
+ // Tracks whether the task's tool calls actually succeeded. The LLM
114
+ // sometimes "summarizes" a tool error into a falsely-confident final
115
+ // response — we don't want to trust the model's word about success.
116
+ const taskActionAudit = {
117
+ actionToolsCalled: 0,
118
+ actionToolsSucceeded: 0,
119
+ toolErrors: [], // { tool, message }
120
+ };
110
121
 
111
122
  try {
112
123
  // Read prompt.md fresh each iteration (villager may have edited it)
113
124
  const promptPath = join(agentDir, 'prompt.md');
114
- const systemPrompt = existsSync(promptPath)
125
+ const basePrompt = existsSync(promptPath)
115
126
  ? readFileSync(promptPath, 'utf-8')
116
127
  : `You are an agent named ${config.display_name || agentName}. Be helpful and concise.`;
117
128
 
129
+ // Append wisdom skills to the system prompt. We inline the full bodies
130
+ // for v1 — agent skill packs are small and this keeps the loop simple.
131
+ // (If they grow large, switch to lazy-load via a `wisdom_load` tool.)
132
+ const wisdom = readAgentWisdom(agentName);
133
+ let systemPrompt = basePrompt;
134
+ if (wisdom.length > 0) {
135
+ const skills = wisdom.map(w => {
136
+ const header = `### Skill: ${w.name}${w.description ? ` — ${w.description}` : ''}${w.trigger ? `\nWhen: ${w.trigger}` : ''}`;
137
+ return `${header}\n\n${w.body.trim()}`;
138
+ }).join('\n\n---\n\n');
139
+ systemPrompt = `${basePrompt}\n\n## Available Skills\n\nThese are skill packs you can apply when the trigger matches the current situation.\n\n${skills}`;
140
+ logActivity(agentDir, { type: 'wisdom_loaded', count: wisdom.length, names: wisdom.map(w => w.name) });
141
+ }
142
+
143
+ // Try to pull an assigned task first. Tasks take priority over ambient
144
+ // feed-monitoring. If nothing is queued, fall through to the default.
145
+ if (config.man?.village_agent_id) {
146
+ activeTask = await pollAndClaim(config.man.village_agent_id, agentDir);
147
+ }
148
+
118
149
  // Gather context (returns { text, mentionsCount })
119
150
  const contextResult = await gatherContext(config, lastCheckIn, recentActions);
120
- const context = contextResult.text;
151
+ let context = contextResult.text;
121
152
  mentionsFound = contextResult.mentionsCount;
122
153
 
154
+ if (activeTask) {
155
+ const taskLine = `TASK ${activeTask.id} (${activeTask.taskType}): ${activeTask.instruction || JSON.stringify(activeTask.input || {})}`;
156
+ context = `${taskLine}\n\n${context}`;
157
+ logActivity(agentDir, { type: 'task_claimed', taskId: activeTask.id, taskType: activeTask.taskType });
158
+ }
159
+
123
160
  // Count feed items from context
124
161
  feedItemsRead = (context.match(/^- @/gm) || []).length;
125
162
 
@@ -149,7 +186,8 @@ export async function agentLoop(agentName, { signal }) {
149
186
  },
150
187
  });
151
188
 
152
- // Log tool calls and count activity
189
+ // Log tool calls and count activity. Also audit action-tool success
190
+ // so we don't trust the model's final text about whether a task worked.
153
191
  if (result.steps?.length) {
154
192
  for (const step of result.steps) {
155
193
  if (step.toolCalls?.length) {
@@ -164,19 +202,25 @@ export async function agentLoop(agentName, { signal }) {
164
202
  for (let i = 0; i < step.toolResults.length; i++) {
165
203
  const tr = step.toolResults[i];
166
204
  const args = step.toolCalls[i]?.args;
205
+ const errored = isToolResultError(tr);
206
+ auditToolCall(taskActionAudit, tr.toolName, errored, tr);
167
207
  logActivity(agentDir, {
168
208
  type: 'tool_call',
169
209
  tool: tr.toolName,
170
210
  args,
171
- result: typeof tr.result === 'string' ? tr.result.slice(0, 200) : 'ok',
211
+ result: summarizeToolResult(tr),
212
+ ok: !errored,
172
213
  });
173
214
  }
174
215
  } else if (step.toolResults?.length) {
175
216
  for (const tr of step.toolResults) {
217
+ const errored = isToolResultError(tr);
218
+ auditToolCall(taskActionAudit, tr.toolName, errored, tr);
176
219
  logActivity(agentDir, {
177
220
  type: 'tool_call',
178
221
  tool: tr.toolName,
179
- result: typeof tr.result === 'string' ? tr.result.slice(0, 200) : 'ok',
222
+ result: summarizeToolResult(tr),
223
+ ok: !errored,
180
224
  });
181
225
  }
182
226
  }
@@ -187,6 +231,7 @@ export async function agentLoop(agentName, { signal }) {
187
231
  if (tc.toolName === 'post_create') activity.postsCreated++;
188
232
  if (tc.toolName === 'comment_create') activity.commentsCreated++;
189
233
  if (tc.toolName === 'vote_cast') activity.votesGiven++;
234
+ // No paired result here — assume executed, can't audit.
190
235
  logActivity(agentDir, {
191
236
  type: 'tool_call',
192
237
  tool: tc.toolName,
@@ -220,6 +265,53 @@ export async function agentLoop(agentName, { signal }) {
220
265
  // Keep only last 50 actions to bound memory
221
266
  if (recentActions.length > 50) recentActions.splice(0, recentActions.length - 50);
222
267
 
268
+ // If a task was being processed, decide success vs. failure based on
269
+ // whether the action tools actually succeeded — not on the model's
270
+ // self-report. The LLM sometimes claims "I posted!" after a tool error.
271
+ if (activeTask && config.man?.village_agent_id) {
272
+ const shouldFail =
273
+ taskActionAudit.actionToolsCalled > 0 &&
274
+ taskActionAudit.actionToolsSucceeded === 0;
275
+
276
+ try {
277
+ if (shouldFail) {
278
+ const firstError = taskActionAudit.toolErrors[0];
279
+ const errorMessage = firstError
280
+ ? `${firstError.tool} failed: ${firstError.message}`
281
+ : 'Action tools called but all failed';
282
+ await completeAgentTask(config.man.village_agent_id, activeTask.id, {
283
+ errorMessage,
284
+ output: {
285
+ text: result.text || '',
286
+ toolCalls: activity.toolCalls,
287
+ toolErrors: taskActionAudit.toolErrors,
288
+ note: 'Marked FAILED because the action tools did not succeed. The model\'s text may claim success but the underlying tool calls errored.',
289
+ },
290
+ tokensUsed: (result.usage?.promptTokens || 0) + (result.usage?.completionTokens || 0),
291
+ durationMs: Date.now() - loopStart,
292
+ });
293
+ logActivity(agentDir, {
294
+ type: 'task_failed',
295
+ taskId: activeTask.id,
296
+ reason: errorMessage,
297
+ });
298
+ } else {
299
+ await completeAgentTask(config.man.village_agent_id, activeTask.id, {
300
+ output: {
301
+ text: result.text || '',
302
+ toolCalls: activity.toolCalls,
303
+ toolErrors: taskActionAudit.toolErrors.length > 0 ? taskActionAudit.toolErrors : undefined,
304
+ },
305
+ tokensUsed: (result.usage?.promptTokens || 0) + (result.usage?.completionTokens || 0),
306
+ durationMs: Date.now() - loopStart,
307
+ });
308
+ logActivity(agentDir, { type: 'task_completed', taskId: activeTask.id });
309
+ }
310
+ } catch (taskErr) {
311
+ logActivity(agentDir, { type: 'error', error: `Failed to mark task complete: ${taskErr.message}` });
312
+ }
313
+ }
314
+
223
315
  // Send server-side heartbeat
224
316
  if (config.man?.agent_id) {
225
317
  try {
@@ -242,6 +334,18 @@ export async function agentLoop(agentName, { signal }) {
242
334
  type: 'error',
243
335
  error: err.message,
244
336
  });
337
+ // If a task was in flight when we crashed, mark it FAILED so it isn't lost
338
+ if (activeTask && config.man?.village_agent_id) {
339
+ try {
340
+ await completeAgentTask(config.man.village_agent_id, activeTask.id, {
341
+ errorMessage: err.message,
342
+ durationMs: Date.now() - loopStart,
343
+ });
344
+ logActivity(agentDir, { type: 'task_failed', taskId: activeTask.id });
345
+ } catch {
346
+ // best-effort
347
+ }
348
+ }
245
349
  }
246
350
 
247
351
  lastCheckIn = new Date().toISOString();
@@ -288,6 +392,111 @@ function updateHeartbeat(agentDir) {
288
392
  }
289
393
  }
290
394
 
395
+ // ── Tool result auditing ───────────────────────────────────────────
396
+ // The Vercel AI SDK returns tool results in a few different shapes
397
+ // depending on the underlying transport. These helpers normalise them
398
+ // so we can detect errors regardless of which path is in play.
399
+
400
+ // Tools that take a real, externally-visible action on the platform.
401
+ // We use this set to decide whether a task that ran but didn't actually
402
+ // succeed (e.g. a 404 from post_create) should be marked FAILED.
403
+ const ACTION_TOOLS = new Set([
404
+ 'post_create',
405
+ 'comment_create',
406
+ 'vote_cast',
407
+ 'knowledge_submit',
408
+ 'community_join',
409
+ 'community_leave',
410
+ 'community_event_create',
411
+ 'community_event_register',
412
+ 'community_event_unregister',
413
+ 'community_event_cancel',
414
+ 'moment_create',
415
+ 'pulse_create',
416
+ 'agent_join_community',
417
+ 'agent_leave_community',
418
+ 'wallet_send',
419
+ 'wallet_tip',
420
+ 'wisdom_import',
421
+ 'task_assign',
422
+ 'task_complete',
423
+ 'task_retry',
424
+ ]);
425
+
426
+ function flattenToolResultText(tr) {
427
+ if (!tr) return '';
428
+ const r = tr.result;
429
+ if (typeof r === 'string') return r;
430
+ if (Array.isArray(r?.content)) {
431
+ return r.content
432
+ .map(c => (typeof c === 'string' ? c : c?.text || ''))
433
+ .filter(Boolean)
434
+ .join(' ');
435
+ }
436
+ try { return JSON.stringify(r); } catch { return ''; }
437
+ }
438
+
439
+ function isToolResultError(tr) {
440
+ if (!tr) return false;
441
+ // Explicit MCP / Vercel AI SDK error flags
442
+ if (tr.isError === true) return true;
443
+ if (tr.result?.isError === true) return true;
444
+ if (Array.isArray(tr.result?.content) && tr.result.content.some(c => c?.isError === true)) {
445
+ return true;
446
+ }
447
+ // Heuristic fallback: look for HTTP-error and well-known failure phrases
448
+ // in the result text. Conservative; doesn't false-positive on prose like
449
+ // "the user was unauthorized to do X" because we anchor on word boundaries.
450
+ const text = flattenToolResultText(tr);
451
+ if (!text) return false;
452
+ return /\b(40[0-9]|50[0-9])\b/.test(text) ||
453
+ /\b(not found|unauthorized|forbidden|invalid|insufficient_quota|authentication failed)\b/i.test(text);
454
+ }
455
+
456
+ function auditToolCall(audit, toolName, errored, tr) {
457
+ if (ACTION_TOOLS.has(toolName)) {
458
+ audit.actionToolsCalled++;
459
+ if (!errored) {
460
+ audit.actionToolsSucceeded++;
461
+ }
462
+ }
463
+ if (errored) {
464
+ audit.toolErrors.push({
465
+ tool: toolName,
466
+ message: flattenToolResultText(tr).slice(0, 300) || 'unknown error',
467
+ });
468
+ }
469
+ }
470
+
471
+ function summarizeToolResult(tr) {
472
+ const text = flattenToolResultText(tr);
473
+ if (!text) return 'ok';
474
+ return text.slice(0, 200);
475
+ }
476
+
477
+ // Pull up to 5 pending tasks and claim the first one we can win the race for.
478
+ // Returns the claimed task or null. Errors are swallowed and logged — the loop
479
+ // should keep running on transient backend issues.
480
+ async function pollAndClaim(villageAgentId, agentDir) {
481
+ try {
482
+ const result = await listAgentTasks(villageAgentId, { status: 'PENDING', limit: 5 });
483
+ const pending = result.tasks || [];
484
+ if (pending.length === 0) return null;
485
+ for (const task of pending) {
486
+ try {
487
+ const claim = await claimAgentTask(villageAgentId, task.id);
488
+ return claim.data || task;
489
+ } catch {
490
+ // Race lost (409) or transient — try the next task
491
+ }
492
+ }
493
+ return null;
494
+ } catch (err) {
495
+ logActivity(agentDir, { type: 'error', error: `Task poll failed: ${err.message}` });
496
+ return null;
497
+ }
498
+ }
499
+
291
500
  function sleep(ms, signal) {
292
501
  return new Promise((resolve) => {
293
502
  if (signal?.aborted) { resolve(); return; }