npm - @myvillage/cli - Versions diffs - 1.17.0 → 1.18.0 - Mend

@myvillage/cli 1.17.0 → 1.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/package.json +1 -1
package/src/agent-runtime/loop.js +141 -10
package/src/commands/agent-local.js +41 -0
package/src/index.js +13 -0
package/src/utils/agent-scaffolder.js +8 -0
package/src/utils/api.js +22 -0

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@myvillage/cli",
-  "version": "1.17.0",
+  "version": "1.18.0",
   "description": "MyVillageOS CLI for community developers",
   "type": "module",
   "bin": {

package/src/agent-runtime/loop.js CHANGED Viewed

@@ -110,6 +110,14 @@ export async function agentLoop(agentName, { signal }) {
     let mentionsFound = 0;
     // Hoisted so the catch block can mark in-flight tasks FAILED.
     let activeTask = null;
+    // Tracks whether the task's tool calls actually succeeded. The LLM
+    // sometimes "summarizes" a tool error into a falsely-confident final
+    // response — we don't want to trust the model's word about success.
+    const taskActionAudit = {
+      actionToolsCalled: 0,
+      actionToolsSucceeded: 0,
+      toolErrors: [], // { tool, message }
+    };
     try {
       // Read prompt.md fresh each iteration (villager may have edited it)
@@ -178,7 +186,8 @@ export async function agentLoop(agentName, { signal }) {
         },
       });
-      // Log tool calls and count activity
+      // Log tool calls and count activity. Also audit action-tool success
+      // so we don't trust the model's final text about whether a task worked.
       if (result.steps?.length) {
         for (const step of result.steps) {
           if (step.toolCalls?.length) {
@@ -193,19 +202,25 @@ export async function agentLoop(agentName, { signal }) {
             for (let i = 0; i < step.toolResults.length; i++) {
               const tr = step.toolResults[i];
               const args = step.toolCalls[i]?.args;
+              const errored = isToolResultError(tr);
+              auditToolCall(taskActionAudit, tr.toolName, errored, tr);
               logActivity(agentDir, {
                 type: 'tool_call',
                 tool: tr.toolName,
                 args,
-                result: typeof tr.result === 'string' ? tr.result.slice(0, 200) : 'ok',
+                result: summarizeToolResult(tr),
+                ok: !errored,
               });
             }
           } else if (step.toolResults?.length) {
             for (const tr of step.toolResults) {
+              const errored = isToolResultError(tr);
+              auditToolCall(taskActionAudit, tr.toolName, errored, tr);
               logActivity(agentDir, {
                 type: 'tool_call',
                 tool: tr.toolName,
-                result: typeof tr.result === 'string' ? tr.result.slice(0, 200) : 'ok',
+                result: summarizeToolResult(tr),
+                ok: !errored,
               });
             }
           }
@@ -216,6 +231,7 @@ export async function agentLoop(agentName, { signal }) {
           if (tc.toolName === 'post_create') activity.postsCreated++;
           if (tc.toolName === 'comment_create') activity.commentsCreated++;
           if (tc.toolName === 'vote_cast') activity.votesGiven++;
+          // No paired result here — assume executed, can't audit.
           logActivity(agentDir, {
             type: 'tool_call',
             tool: tc.toolName,
@@ -249,15 +265,48 @@ export async function agentLoop(agentName, { signal }) {
       // Keep only last 50 actions to bound memory
       if (recentActions.length > 50) recentActions.splice(0, recentActions.length - 50);
-      // If a task was being processed, mark it complete with the model's output.
+      // If a task was being processed, decide success vs. failure based on
+      // whether the action tools actually succeeded — not on the model's
+      // self-report. The LLM sometimes claims "I posted!" after a tool error.
       if (activeTask && config.man?.village_agent_id) {
+        const shouldFail =
+          taskActionAudit.actionToolsCalled > 0 &&
+          taskActionAudit.actionToolsSucceeded === 0;
         try {
-          await completeAgentTask(config.man.village_agent_id, activeTask.id, {
-            output: { text: result.text || '', toolCalls: activity.toolCalls },
-            tokensUsed: (result.usage?.promptTokens || 0) + (result.usage?.completionTokens || 0),
-            durationMs: Date.now() - loopStart,
-          });
-          logActivity(agentDir, { type: 'task_completed', taskId: activeTask.id });
+          if (shouldFail) {
+            const firstError = taskActionAudit.toolErrors[0];
+            const errorMessage = firstError
+              ? `${firstError.tool} failed: ${firstError.message}`
+              : 'Action tools called but all failed';
+            await completeAgentTask(config.man.village_agent_id, activeTask.id, {
+              errorMessage,
+              output: {
+                text: result.text || '',
+                toolCalls: activity.toolCalls,
+                toolErrors: taskActionAudit.toolErrors,
+                note: 'Marked FAILED because the action tools did not succeed. The model\'s text may claim success but the underlying tool calls errored.',
+              },
+              tokensUsed: (result.usage?.promptTokens || 0) + (result.usage?.completionTokens || 0),
+              durationMs: Date.now() - loopStart,
+            });
+            logActivity(agentDir, {
+              type: 'task_failed',
+              taskId: activeTask.id,
+              reason: errorMessage,
+            });
+          } else {
+            await completeAgentTask(config.man.village_agent_id, activeTask.id, {
+              output: {
+                text: result.text || '',
+                toolCalls: activity.toolCalls,
+                toolErrors: taskActionAudit.toolErrors.length > 0 ? taskActionAudit.toolErrors : undefined,
+              },
+              tokensUsed: (result.usage?.promptTokens || 0) + (result.usage?.completionTokens || 0),
+              durationMs: Date.now() - loopStart,
+            });
+            logActivity(agentDir, { type: 'task_completed', taskId: activeTask.id });
+          }
         } catch (taskErr) {
           logActivity(agentDir, { type: 'error', error: `Failed to mark task complete: ${taskErr.message}` });
         }
@@ -343,6 +392,88 @@ function updateHeartbeat(agentDir) {
   }
 }
+// ── Tool result auditing ───────────────────────────────────────────
+// The Vercel AI SDK returns tool results in a few different shapes
+// depending on the underlying transport. These helpers normalise them
+// so we can detect errors regardless of which path is in play.
+// Tools that take a real, externally-visible action on the platform.
+// We use this set to decide whether a task that ran but didn't actually
+// succeed (e.g. a 404 from post_create) should be marked FAILED.
+const ACTION_TOOLS = new Set([
+  'post_create',
+  'comment_create',
+  'vote_cast',
+  'knowledge_submit',
+  'community_join',
+  'community_leave',
+  'community_event_create',
+  'community_event_register',
+  'community_event_unregister',
+  'community_event_cancel',
+  'moment_create',
+  'pulse_create',
+  'agent_join_community',
+  'agent_leave_community',
+  'wallet_send',
+  'wallet_tip',
+  'wisdom_import',
+  'task_assign',
+  'task_complete',
+  'task_retry',
+]);
+function flattenToolResultText(tr) {
+  if (!tr) return '';
+  const r = tr.result;
+  if (typeof r === 'string') return r;
+  if (Array.isArray(r?.content)) {
+    return r.content
+      .map(c => (typeof c === 'string' ? c : c?.text || ''))
+      .filter(Boolean)
+      .join(' ');
+  }
+  try { return JSON.stringify(r); } catch { return ''; }
+}
+function isToolResultError(tr) {
+  if (!tr) return false;
+  // Explicit MCP / Vercel AI SDK error flags
+  if (tr.isError === true) return true;
+  if (tr.result?.isError === true) return true;
+  if (Array.isArray(tr.result?.content) && tr.result.content.some(c => c?.isError === true)) {
+    return true;
+  }
+  // Heuristic fallback: look for HTTP-error and well-known failure phrases
+  // in the result text. Conservative; doesn't false-positive on prose like
+  // "the user was unauthorized to do X" because we anchor on word boundaries.
+  const text = flattenToolResultText(tr);
+  if (!text) return false;
+  return /\b(40[0-9]|50[0-9])\b/.test(text) ||
+    /\b(not found|unauthorized|forbidden|invalid|insufficient_quota|authentication failed)\b/i.test(text);
+}
+function auditToolCall(audit, toolName, errored, tr) {
+  if (ACTION_TOOLS.has(toolName)) {
+    audit.actionToolsCalled++;
+    if (!errored) {
+      audit.actionToolsSucceeded++;
+    }
+  }
+  if (errored) {
+    audit.toolErrors.push({
+      tool: toolName,
+      message: flattenToolResultText(tr).slice(0, 300) || 'unknown error',
+    });
+  }
+}
+function summarizeToolResult(tr) {
+  const text = flattenToolResultText(tr);
+  if (!text) return 'ok';
+  return text.slice(0, 200);
+}
 // Pull up to 5 pending tasks and claim the first one we can win the race for.
 // Returns the claimed task or null. Errors are swallowed and logged — the loop
 // should keep running on transient backend issues.

package/src/commands/agent-local.js CHANGED Viewed

@@ -24,6 +24,8 @@ import {
   deleteAgentMemoryEntry,
   listKnowledgeFiltered,
   shareKnowledgeAsAgent,
+  retryAgentTask,
+  retryFailedAgentTasks,
 } from '../utils/api.js';
 import {
   getAgentDir,
@@ -880,6 +882,45 @@ export async function reportTaskOutcome(villageAgentId, taskId, outcome) {
   return completeAgentTask(villageAgentId, taskId, outcome);
 }
+export async function agentTaskRetryCommand(name, taskId) {
+  if (!isAuthenticated()) {
+    console.log(chalk.red('  ✗ Authentication required. Run \'myvillage login\' first.'));
+    return;
+  }
+  const villageAgentId = resolveVillageAgentId(name);
+  if (!villageAgentId) return;
+  if (!taskId) {
+    console.log(chalk.red('  ✗ Usage: myvillage agent task-retry <name> <taskId>\n'));
+    return;
+  }
+  try {
+    await retryAgentTask(villageAgentId, taskId);
+    console.log(brand.green(`  ✓ Task ${taskId} reset to PENDING. Agent will re-claim on next poll.\n`));
+  } catch (err) {
+    const msg = err.response?.data?.error || err.message;
+    console.log(chalk.red(`  ✗ Retry failed: ${msg}\n`));
+  }
+}
+export async function agentTaskRetryFailedCommand(name, options = {}) {
+  if (!isAuthenticated()) {
+    console.log(chalk.red('  ✗ Authentication required. Run \'myvillage login\' first.'));
+    return;
+  }
+  const villageAgentId = resolveVillageAgentId(name);
+  if (!villageAgentId) return;
+  try {
+    const result = await retryFailedAgentTasks(villageAgentId, options.filter);
+    const filterNote = options.filter ? chalk.dim(` (filter: ${options.filter})`) : '';
+    console.log(brand.green(`  ✓ ${result.retried} task(s) reset to PENDING${filterNote}.\n`));
+  } catch (err) {
+    const msg = err.response?.data?.error || err.message;
+    console.log(chalk.red(`  ✗ Bulk retry failed: ${msg}\n`));
+  }
+}
 // ── Memory (short-term KV) and Recall (long-term searchable) ────
 function resolveAgentProfileId(name) {

package/src/index.js CHANGED Viewed

@@ -53,6 +53,8 @@ import {
   agentRemoveToolCommand,
   agentTaskListCommand,
   agentTaskAssignCommand,
+  agentTaskRetryCommand,
+  agentTaskRetryFailedCommand,
   agentMemoryCommand,
   agentRecallCommand,
   agentRememberCommand,
@@ -455,6 +457,17 @@ export function run() {
     .option('--priority <n>', 'Priority 1-10 (lower runs first)', '5')
     .action(agentTaskAssignCommand);
+  agentCmd
+    .command('task-retry <name> <taskId>')
+    .description('Reset a FAILED or CANCELLED task back to PENDING so the agent retries it')
+    .action(agentTaskRetryCommand);
+  agentCmd
+    .command('task-retry-failed <name>')
+    .description('Bulk-reset every FAILED task for an agent back to PENDING')
+    .option('--filter <text>', 'Only retry tasks whose errorMessage contains this substring')
+    .action(agentTaskRetryFailedCommand);
   // Agent memory (short-term KV state) and recall (long-term searchable memory)
   agentCmd
     .command('memory <name> <action> [args...]')

package/src/utils/agent-scaffolder.js CHANGED Viewed

@@ -138,6 +138,14 @@ ${description}
 - Speaks casually but clearly
 - Concise in responses
+## Handling tasks
+When you receive a TASK in your context, follow these rules:
+- **Use the values from the task input verbatim.** If the task input is JSON like \`{"communitySlug":"general"}\`, call the tool with \`communitySlug: "general"\` exactly. Do not substitute, translate, or invent slugs, IDs, or names.
+- **If a required value is missing, do NOT guess.** Reply in your final text that the task is missing required information (e.g. "Task is missing a communitySlug — cannot proceed"). Don't pick a community at random.
+- **If a tool call fails, do NOT claim success.** Report what failed and why in your final text. The platform decides whether the task is FAILED based on whether the tools actually succeeded — making up a success message hides the real error.
+- **Use the platform's communities you already belong to.** If you don't know a community exists, use \`community_view\` to check before posting.
 ## Boundaries
 - Never share personal files or private data to the feed
 - Ask before posting anything longer than 2 sentences

package/src/utils/api.js CHANGED Viewed

@@ -679,6 +679,28 @@ export async function completeAgentTask(villageAgentId, taskId, data = {}) {
   return response.data;
 }
+// Retry a single FAILED or CANCELLED task: resets it to PENDING so the
+// agent daemon re-claims it on the next polling iteration.
+export async function retryAgentTask(villageAgentId, taskId) {
+  const client = getPlatformClient();
+  const response = await client.post(
+    `/village-agents/${encodeURIComponent(villageAgentId)}/tasks/${encodeURIComponent(taskId)}/retry`,
+  );
+  return response.data;
+}
+// Bulk-retry every FAILED task for an agent. Optional `errorPattern`
+// filters by errorMessage substring (case-insensitive).
+export async function retryFailedAgentTasks(villageAgentId, errorPattern) {
+  const client = getPlatformClient();
+  const body = errorPattern ? { errorPattern } : {};
+  const response = await client.post(
+    `/village-agents/${encodeURIComponent(villageAgentId)}/tasks/retry-failed`,
+    body,
+  );
+  return response.data;
+}
 // ── Wisdom (VillageBooks repurposed as agent skill packs) ──────────
 export async function listVillageBooks(params = {}) {