npm - @myvillage/cli - Versions diffs - 1.50.0 → 1.51.0 - Mend

@myvillage/cli 1.50.0 → 1.51.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/package.json +1 -1
package/src/agent-runtime/loop.js +27 -3

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@myvillage/cli",
-  "version": "1.50.0",
+  "version": "1.51.0",
   "description": "MyVillageOS CLI for community developers",
   "type": "module",
   "bin": {

package/src/agent-runtime/loop.js CHANGED Viewed

@@ -245,16 +245,38 @@ Guidelines:
       // "find any README under the workspace" can do
       // list_allowed_directories → list_directory → search_files → ...
       // until it's actually done.
+      //
+      // Anthropic prompt caching: we mark the system block as ephemeral so
+      // everything before it (tool schemas + system prompt) is cached.
+      // For Anthropic, cached reads are ~10x cheaper than fresh input —
+      // critical because MCP tool schemas alone can be 20–40k tokens that
+      // would otherwise be re-billed at every step of every loop iteration.
+      // The breakpoint is namespaced under `anthropic`, so OpenAI calls
+      // silently ignore it. Cache TTL is ~5 min; agents that loop more
+      // often than that stay warm and pay full price only on the first hit.
       const result = await generateText({
         model,
-        system: systemPrompt,
-        prompt: context,
+        messages: [
+          {
+            role: 'system',
+            content: systemPrompt,
+            providerOptions: {
+              anthropic: { cacheControl: { type: 'ephemeral' } },
+            },
+          },
+          { role: 'user', content: context },
+        ],
         tools,
         stopWhen: stepCountIs(10),
         maxOutputTokens: maxTokens,
       });
-      // Log LLM response
+      // Log LLM response. Cache metrics come from the Anthropic provider
+      // metadata — surfacing them lets `agent logs` show whether caching is
+      // landing. `cacheReadInputTokens` should grow on the 2nd+ iteration in
+      // a 5-min window; if it stays 0, the prefix isn't matching (usually
+      // because something upstream of the breakpoint is varying per call).
+      const anthropicMeta = result.providerMetadata?.anthropic || {};
       logActivity(agentDir, {
         type: 'llm_response',
         text: (result.text || '').slice(0, 500),
@@ -262,6 +284,8 @@ Guidelines:
           prompt: result.usage?.inputTokens || 0,
           completion: result.usage?.outputTokens || 0,
           total: result.usage?.totalTokens || 0,
+          cacheCreate: anthropicMeta.cacheCreationInputTokens || 0,
+          cacheRead: anthropicMeta.cacheReadInputTokens || 0,
         },
       });