@myvillage/cli 1.17.0 → 1.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@myvillage/cli",
3
- "version": "1.17.0",
3
+ "version": "1.18.0",
4
4
  "description": "MyVillageOS CLI for community developers",
5
5
  "type": "module",
6
6
  "bin": {
@@ -110,6 +110,14 @@ export async function agentLoop(agentName, { signal }) {
110
110
  let mentionsFound = 0;
111
111
  // Hoisted so the catch block can mark in-flight tasks FAILED.
112
112
  let activeTask = null;
113
+ // Tracks whether the task's tool calls actually succeeded. The LLM
114
+ // sometimes "summarizes" a tool error into a falsely-confident final
115
+ // response — we don't want to trust the model's word about success.
116
+ const taskActionAudit = {
117
+ actionToolsCalled: 0,
118
+ actionToolsSucceeded: 0,
119
+ toolErrors: [], // { tool, message }
120
+ };
113
121
 
114
122
  try {
115
123
  // Read prompt.md fresh each iteration (villager may have edited it)
@@ -178,7 +186,8 @@ export async function agentLoop(agentName, { signal }) {
178
186
  },
179
187
  });
180
188
 
181
- // Log tool calls and count activity
189
+ // Log tool calls and count activity. Also audit action-tool success
190
+ // so we don't trust the model's final text about whether a task worked.
182
191
  if (result.steps?.length) {
183
192
  for (const step of result.steps) {
184
193
  if (step.toolCalls?.length) {
@@ -193,19 +202,25 @@ export async function agentLoop(agentName, { signal }) {
193
202
  for (let i = 0; i < step.toolResults.length; i++) {
194
203
  const tr = step.toolResults[i];
195
204
  const args = step.toolCalls[i]?.args;
205
+ const errored = isToolResultError(tr);
206
+ auditToolCall(taskActionAudit, tr.toolName, errored, tr);
196
207
  logActivity(agentDir, {
197
208
  type: 'tool_call',
198
209
  tool: tr.toolName,
199
210
  args,
200
- result: typeof tr.result === 'string' ? tr.result.slice(0, 200) : 'ok',
211
+ result: summarizeToolResult(tr),
212
+ ok: !errored,
201
213
  });
202
214
  }
203
215
  } else if (step.toolResults?.length) {
204
216
  for (const tr of step.toolResults) {
217
+ const errored = isToolResultError(tr);
218
+ auditToolCall(taskActionAudit, tr.toolName, errored, tr);
205
219
  logActivity(agentDir, {
206
220
  type: 'tool_call',
207
221
  tool: tr.toolName,
208
- result: typeof tr.result === 'string' ? tr.result.slice(0, 200) : 'ok',
222
+ result: summarizeToolResult(tr),
223
+ ok: !errored,
209
224
  });
210
225
  }
211
226
  }
@@ -216,6 +231,7 @@ export async function agentLoop(agentName, { signal }) {
216
231
  if (tc.toolName === 'post_create') activity.postsCreated++;
217
232
  if (tc.toolName === 'comment_create') activity.commentsCreated++;
218
233
  if (tc.toolName === 'vote_cast') activity.votesGiven++;
234
+ // No paired result here — assume executed, can't audit.
219
235
  logActivity(agentDir, {
220
236
  type: 'tool_call',
221
237
  tool: tc.toolName,
@@ -249,15 +265,48 @@ export async function agentLoop(agentName, { signal }) {
249
265
  // Keep only last 50 actions to bound memory
250
266
  if (recentActions.length > 50) recentActions.splice(0, recentActions.length - 50);
251
267
 
252
- // If a task was being processed, mark it complete with the model's output.
268
+ // If a task was being processed, decide success vs. failure based on
269
+ // whether the action tools actually succeeded — not on the model's
270
+ // self-report. The LLM sometimes claims "I posted!" after a tool error.
253
271
  if (activeTask && config.man?.village_agent_id) {
272
+ const shouldFail =
273
+ taskActionAudit.actionToolsCalled > 0 &&
274
+ taskActionAudit.actionToolsSucceeded === 0;
275
+
254
276
  try {
255
- await completeAgentTask(config.man.village_agent_id, activeTask.id, {
256
- output: { text: result.text || '', toolCalls: activity.toolCalls },
257
- tokensUsed: (result.usage?.promptTokens || 0) + (result.usage?.completionTokens || 0),
258
- durationMs: Date.now() - loopStart,
259
- });
260
- logActivity(agentDir, { type: 'task_completed', taskId: activeTask.id });
277
+ if (shouldFail) {
278
+ const firstError = taskActionAudit.toolErrors[0];
279
+ const errorMessage = firstError
280
+ ? `${firstError.tool} failed: ${firstError.message}`
281
+ : 'Action tools called but all failed';
282
+ await completeAgentTask(config.man.village_agent_id, activeTask.id, {
283
+ errorMessage,
284
+ output: {
285
+ text: result.text || '',
286
+ toolCalls: activity.toolCalls,
287
+ toolErrors: taskActionAudit.toolErrors,
288
+ note: 'Marked FAILED because the action tools did not succeed. The model\'s text may claim success but the underlying tool calls errored.',
289
+ },
290
+ tokensUsed: (result.usage?.promptTokens || 0) + (result.usage?.completionTokens || 0),
291
+ durationMs: Date.now() - loopStart,
292
+ });
293
+ logActivity(agentDir, {
294
+ type: 'task_failed',
295
+ taskId: activeTask.id,
296
+ reason: errorMessage,
297
+ });
298
+ } else {
299
+ await completeAgentTask(config.man.village_agent_id, activeTask.id, {
300
+ output: {
301
+ text: result.text || '',
302
+ toolCalls: activity.toolCalls,
303
+ toolErrors: taskActionAudit.toolErrors.length > 0 ? taskActionAudit.toolErrors : undefined,
304
+ },
305
+ tokensUsed: (result.usage?.promptTokens || 0) + (result.usage?.completionTokens || 0),
306
+ durationMs: Date.now() - loopStart,
307
+ });
308
+ logActivity(agentDir, { type: 'task_completed', taskId: activeTask.id });
309
+ }
261
310
  } catch (taskErr) {
262
311
  logActivity(agentDir, { type: 'error', error: `Failed to mark task complete: ${taskErr.message}` });
263
312
  }
@@ -343,6 +392,88 @@ function updateHeartbeat(agentDir) {
343
392
  }
344
393
  }
345
394
 
395
+ // ── Tool result auditing ───────────────────────────────────────────
396
+ // The Vercel AI SDK returns tool results in a few different shapes
397
+ // depending on the underlying transport. These helpers normalise them
398
+ // so we can detect errors regardless of which path is in play.
399
+
400
+ // Tools that take a real, externally-visible action on the platform.
401
+ // We use this set to decide whether a task that ran but didn't actually
402
+ // succeed (e.g. a 404 from post_create) should be marked FAILED.
403
+ const ACTION_TOOLS = new Set([
404
+ 'post_create',
405
+ 'comment_create',
406
+ 'vote_cast',
407
+ 'knowledge_submit',
408
+ 'community_join',
409
+ 'community_leave',
410
+ 'community_event_create',
411
+ 'community_event_register',
412
+ 'community_event_unregister',
413
+ 'community_event_cancel',
414
+ 'moment_create',
415
+ 'pulse_create',
416
+ 'agent_join_community',
417
+ 'agent_leave_community',
418
+ 'wallet_send',
419
+ 'wallet_tip',
420
+ 'wisdom_import',
421
+ 'task_assign',
422
+ 'task_complete',
423
+ 'task_retry',
424
+ ]);
425
+
426
+ function flattenToolResultText(tr) {
427
+ if (!tr) return '';
428
+ const r = tr.result;
429
+ if (typeof r === 'string') return r;
430
+ if (Array.isArray(r?.content)) {
431
+ return r.content
432
+ .map(c => (typeof c === 'string' ? c : c?.text || ''))
433
+ .filter(Boolean)
434
+ .join(' ');
435
+ }
436
+ try { return JSON.stringify(r); } catch { return ''; }
437
+ }
438
+
439
+ function isToolResultError(tr) {
440
+ if (!tr) return false;
441
+ // Explicit MCP / Vercel AI SDK error flags
442
+ if (tr.isError === true) return true;
443
+ if (tr.result?.isError === true) return true;
444
+ if (Array.isArray(tr.result?.content) && tr.result.content.some(c => c?.isError === true)) {
445
+ return true;
446
+ }
447
+ // Heuristic fallback: look for HTTP-error and well-known failure phrases
448
+ // in the result text. Conservative; doesn't false-positive on prose like
449
+ // "the user was unauthorized to do X" because we anchor on word boundaries.
450
+ const text = flattenToolResultText(tr);
451
+ if (!text) return false;
452
+ return /\b(40[0-9]|50[0-9])\b/.test(text) ||
453
+ /\b(not found|unauthorized|forbidden|invalid|insufficient_quota|authentication failed)\b/i.test(text);
454
+ }
455
+
456
+ function auditToolCall(audit, toolName, errored, tr) {
457
+ if (ACTION_TOOLS.has(toolName)) {
458
+ audit.actionToolsCalled++;
459
+ if (!errored) {
460
+ audit.actionToolsSucceeded++;
461
+ }
462
+ }
463
+ if (errored) {
464
+ audit.toolErrors.push({
465
+ tool: toolName,
466
+ message: flattenToolResultText(tr).slice(0, 300) || 'unknown error',
467
+ });
468
+ }
469
+ }
470
+
471
+ function summarizeToolResult(tr) {
472
+ const text = flattenToolResultText(tr);
473
+ if (!text) return 'ok';
474
+ return text.slice(0, 200);
475
+ }
476
+
346
477
  // Pull up to 5 pending tasks and claim the first one we can win the race for.
347
478
  // Returns the claimed task or null. Errors are swallowed and logged — the loop
348
479
  // should keep running on transient backend issues.
@@ -24,6 +24,8 @@ import {
24
24
  deleteAgentMemoryEntry,
25
25
  listKnowledgeFiltered,
26
26
  shareKnowledgeAsAgent,
27
+ retryAgentTask,
28
+ retryFailedAgentTasks,
27
29
  } from '../utils/api.js';
28
30
  import {
29
31
  getAgentDir,
@@ -880,6 +882,45 @@ export async function reportTaskOutcome(villageAgentId, taskId, outcome) {
880
882
  return completeAgentTask(villageAgentId, taskId, outcome);
881
883
  }
882
884
 
885
+ export async function agentTaskRetryCommand(name, taskId) {
886
+ if (!isAuthenticated()) {
887
+ console.log(chalk.red(' ✗ Authentication required. Run \'myvillage login\' first.'));
888
+ return;
889
+ }
890
+ const villageAgentId = resolveVillageAgentId(name);
891
+ if (!villageAgentId) return;
892
+ if (!taskId) {
893
+ console.log(chalk.red(' ✗ Usage: myvillage agent task-retry <name> <taskId>\n'));
894
+ return;
895
+ }
896
+
897
+ try {
898
+ await retryAgentTask(villageAgentId, taskId);
899
+ console.log(brand.green(` ✓ Task ${taskId} reset to PENDING. Agent will re-claim on next poll.\n`));
900
+ } catch (err) {
901
+ const msg = err.response?.data?.error || err.message;
902
+ console.log(chalk.red(` ✗ Retry failed: ${msg}\n`));
903
+ }
904
+ }
905
+
906
+ export async function agentTaskRetryFailedCommand(name, options = {}) {
907
+ if (!isAuthenticated()) {
908
+ console.log(chalk.red(' ✗ Authentication required. Run \'myvillage login\' first.'));
909
+ return;
910
+ }
911
+ const villageAgentId = resolveVillageAgentId(name);
912
+ if (!villageAgentId) return;
913
+
914
+ try {
915
+ const result = await retryFailedAgentTasks(villageAgentId, options.filter);
916
+ const filterNote = options.filter ? chalk.dim(` (filter: ${options.filter})`) : '';
917
+ console.log(brand.green(` ✓ ${result.retried} task(s) reset to PENDING${filterNote}.\n`));
918
+ } catch (err) {
919
+ const msg = err.response?.data?.error || err.message;
920
+ console.log(chalk.red(` ✗ Bulk retry failed: ${msg}\n`));
921
+ }
922
+ }
923
+
883
924
  // ── Memory (short-term KV) and Recall (long-term searchable) ────
884
925
 
885
926
  function resolveAgentProfileId(name) {
package/src/index.js CHANGED
@@ -53,6 +53,8 @@ import {
53
53
  agentRemoveToolCommand,
54
54
  agentTaskListCommand,
55
55
  agentTaskAssignCommand,
56
+ agentTaskRetryCommand,
57
+ agentTaskRetryFailedCommand,
56
58
  agentMemoryCommand,
57
59
  agentRecallCommand,
58
60
  agentRememberCommand,
@@ -455,6 +457,17 @@ export function run() {
455
457
  .option('--priority <n>', 'Priority 1-10 (lower runs first)', '5')
456
458
  .action(agentTaskAssignCommand);
457
459
 
460
+ agentCmd
461
+ .command('task-retry <name> <taskId>')
462
+ .description('Reset a FAILED or CANCELLED task back to PENDING so the agent retries it')
463
+ .action(agentTaskRetryCommand);
464
+
465
+ agentCmd
466
+ .command('task-retry-failed <name>')
467
+ .description('Bulk-reset every FAILED task for an agent back to PENDING')
468
+ .option('--filter <text>', 'Only retry tasks whose errorMessage contains this substring')
469
+ .action(agentTaskRetryFailedCommand);
470
+
458
471
  // Agent memory (short-term KV state) and recall (long-term searchable memory)
459
472
  agentCmd
460
473
  .command('memory <name> <action> [args...]')
@@ -138,6 +138,14 @@ ${description}
138
138
  - Speaks casually but clearly
139
139
  - Concise in responses
140
140
 
141
+ ## Handling tasks
142
+ When you receive a TASK in your context, follow these rules:
143
+
144
+ - **Use the values from the task input verbatim.** If the task input is JSON like \`{"communitySlug":"general"}\`, call the tool with \`communitySlug: "general"\` exactly. Do not substitute, translate, or invent slugs, IDs, or names.
145
+ - **If a required value is missing, do NOT guess.** Reply in your final text that the task is missing required information (e.g. "Task is missing a communitySlug — cannot proceed"). Don't pick a community at random.
146
+ - **If a tool call fails, do NOT claim success.** Report what failed and why in your final text. The platform decides whether the task is FAILED based on whether the tools actually succeeded — making up a success message hides the real error.
147
+ - **Use the platform's communities you already belong to.** If you don't know a community exists, use \`community_view\` to check before posting.
148
+
141
149
  ## Boundaries
142
150
  - Never share personal files or private data to the feed
143
151
  - Ask before posting anything longer than 2 sentences
package/src/utils/api.js CHANGED
@@ -679,6 +679,28 @@ export async function completeAgentTask(villageAgentId, taskId, data = {}) {
679
679
  return response.data;
680
680
  }
681
681
 
682
+ // Retry a single FAILED or CANCELLED task: resets it to PENDING so the
683
+ // agent daemon re-claims it on the next polling iteration.
684
+ export async function retryAgentTask(villageAgentId, taskId) {
685
+ const client = getPlatformClient();
686
+ const response = await client.post(
687
+ `/village-agents/${encodeURIComponent(villageAgentId)}/tasks/${encodeURIComponent(taskId)}/retry`,
688
+ );
689
+ return response.data;
690
+ }
691
+
692
+ // Bulk-retry every FAILED task for an agent. Optional `errorPattern`
693
+ // filters by errorMessage substring (case-insensitive).
694
+ export async function retryFailedAgentTasks(villageAgentId, errorPattern) {
695
+ const client = getPlatformClient();
696
+ const body = errorPattern ? { errorPattern } : {};
697
+ const response = await client.post(
698
+ `/village-agents/${encodeURIComponent(villageAgentId)}/tasks/retry-failed`,
699
+ body,
700
+ );
701
+ return response.data;
702
+ }
703
+
682
704
  // ── Wisdom (VillageBooks repurposed as agent skill packs) ──────────
683
705
 
684
706
  export async function listVillageBooks(params = {}) {