@myvillage/cli 1.50.0 → 1.51.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/agent-runtime/loop.js +27 -3
package/package.json
CHANGED
|
@@ -245,16 +245,38 @@ Guidelines:
|
|
|
245
245
|
// "find any README under the workspace" can do
|
|
246
246
|
// list_allowed_directories → list_directory → search_files → ...
|
|
247
247
|
// until it's actually done.
|
|
248
|
+
//
|
|
249
|
+
// Anthropic prompt caching: we mark the system block as ephemeral so
|
|
250
|
+
// everything before it (tool schemas + system prompt) is cached.
|
|
251
|
+
// For Anthropic, cached reads are ~10x cheaper than fresh input —
|
|
252
|
+
// critical because MCP tool schemas alone can be 20–40k tokens that
|
|
253
|
+
// would otherwise be re-billed at every step of every loop iteration.
|
|
254
|
+
// The breakpoint is namespaced under `anthropic`, so OpenAI calls
|
|
255
|
+
// silently ignore it. Cache TTL is ~5 min; agents that loop more
|
|
256
|
+
// often than that stay warm and pay full price only on the first hit.
|
|
248
257
|
const result = await generateText({
|
|
249
258
|
model,
|
|
250
|
-
|
|
251
|
-
|
|
259
|
+
messages: [
|
|
260
|
+
{
|
|
261
|
+
role: 'system',
|
|
262
|
+
content: systemPrompt,
|
|
263
|
+
providerOptions: {
|
|
264
|
+
anthropic: { cacheControl: { type: 'ephemeral' } },
|
|
265
|
+
},
|
|
266
|
+
},
|
|
267
|
+
{ role: 'user', content: context },
|
|
268
|
+
],
|
|
252
269
|
tools,
|
|
253
270
|
stopWhen: stepCountIs(10),
|
|
254
271
|
maxOutputTokens: maxTokens,
|
|
255
272
|
});
|
|
256
273
|
|
|
257
|
-
// Log LLM response
|
|
274
|
+
// Log LLM response. Cache metrics come from the Anthropic provider
|
|
275
|
+
// metadata — surfacing them lets `agent logs` show whether caching is
|
|
276
|
+
// landing. `cacheReadInputTokens` should grow on the 2nd+ iteration in
|
|
277
|
+
// a 5-min window; if it stays 0, the prefix isn't matching (usually
|
|
278
|
+
// because something upstream of the breakpoint is varying per call).
|
|
279
|
+
const anthropicMeta = result.providerMetadata?.anthropic || {};
|
|
258
280
|
logActivity(agentDir, {
|
|
259
281
|
type: 'llm_response',
|
|
260
282
|
text: (result.text || '').slice(0, 500),
|
|
@@ -262,6 +284,8 @@ Guidelines:
|
|
|
262
284
|
prompt: result.usage?.inputTokens || 0,
|
|
263
285
|
completion: result.usage?.outputTokens || 0,
|
|
264
286
|
total: result.usage?.totalTokens || 0,
|
|
287
|
+
cacheCreate: anthropicMeta.cacheCreationInputTokens || 0,
|
|
288
|
+
cacheRead: anthropicMeta.cacheReadInputTokens || 0,
|
|
265
289
|
},
|
|
266
290
|
});
|
|
267
291
|
|