@myvillage/cli 1.50.0 → 1.51.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@myvillage/cli",
3
- "version": "1.50.0",
3
+ "version": "1.51.0",
4
4
  "description": "MyVillageOS CLI for community developers",
5
5
  "type": "module",
6
6
  "bin": {
@@ -245,16 +245,38 @@ Guidelines:
245
245
  // "find any README under the workspace" can do
246
246
  // list_allowed_directories → list_directory → search_files → ...
247
247
  // until it's actually done.
248
+ //
249
+ // Anthropic prompt caching: we mark the system block as ephemeral so
250
+ // everything before it (tool schemas + system prompt) is cached.
251
+ // For Anthropic, cached reads are ~10x cheaper than fresh input —
252
+ // critical because MCP tool schemas alone can be 20–40k tokens that
253
+ // would otherwise be re-billed at every step of every loop iteration.
254
+ // The breakpoint is namespaced under `anthropic`, so OpenAI calls
255
+ // silently ignore it. Cache TTL is ~5 min; agents that loop more
256
+ // often than that stay warm and pay full price only on the first hit.
248
257
  const result = await generateText({
249
258
  model,
250
- system: systemPrompt,
251
- prompt: context,
259
+ messages: [
260
+ {
261
+ role: 'system',
262
+ content: systemPrompt,
263
+ providerOptions: {
264
+ anthropic: { cacheControl: { type: 'ephemeral' } },
265
+ },
266
+ },
267
+ { role: 'user', content: context },
268
+ ],
252
269
  tools,
253
270
  stopWhen: stepCountIs(10),
254
271
  maxOutputTokens: maxTokens,
255
272
  });
256
273
 
257
- // Log LLM response
274
+ // Log LLM response. Cache metrics come from the Anthropic provider
275
+ // metadata — surfacing them lets `agent logs` show whether caching is
276
+ // landing. `cacheReadInputTokens` should grow on the 2nd+ iteration in
277
+ // a 5-min window; if it stays 0, the prefix isn't matching (usually
278
+ // because something upstream of the breakpoint is varying per call).
279
+ const anthropicMeta = result.providerMetadata?.anthropic || {};
258
280
  logActivity(agentDir, {
259
281
  type: 'llm_response',
260
282
  text: (result.text || '').slice(0, 500),
@@ -262,6 +284,8 @@ Guidelines:
262
284
  prompt: result.usage?.inputTokens || 0,
263
285
  completion: result.usage?.outputTokens || 0,
264
286
  total: result.usage?.totalTokens || 0,
287
+ cacheCreate: anthropicMeta.cacheCreationInputTokens || 0,
288
+ cacheRead: anthropicMeta.cacheReadInputTokens || 0,
265
289
  },
266
290
  });
267
291