npm - @ducci/jarvis - Versions diffs - 1.0.36 → 1.0.38 - Mend

@ducci/jarvis 1.0.36 → 1.0.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/docs/agent.md +45 -0
package/package.json +1 -1
package/src/channels/telegram/index.js +6 -1
package/src/server/agent.js +6 -2
package/src/server/provider.js +8 -3

package/docs/agent.md CHANGED Viewed

@@ -391,6 +391,51 @@ Internal flow summary:
 5. Call the model again with the updated conversation.
 6. Repeat until no tool calls are returned.
+## Anthropic Provider
+When `config.provider === 'anthropic'`, Jarvis uses the Anthropic SDK directly instead of OpenRouter. `src/server/provider.js` exposes an adapter that converts the OpenAI-compatible interface used throughout the codebase into Anthropic's native API format.
+### Message Format Conversion
+Key differences from OpenAI format:
+- The `system` message is extracted from `messages[0]` and passed as a separate top-level `system` parameter (array form, to support `cache_control`).
+- Assistant `tool_calls` are converted to `content` blocks of type `tool_use`.
+- `role: "tool"` messages are grouped into `role: "user"` messages containing `tool_result` blocks.
+- Consecutive `user` messages are merged (Anthropic requires strict user/assistant alternation).
+### Prompt Caching
+Jarvis enables Anthropic prompt caching to reduce cost and latency on repeated turns. The `extended-cache-ttl-2025-01-13` beta header is sent on every Anthropic request, upgrading the cache TTL from 5 minutes to **1 hour**.
+Two cache breakpoints are set per request:
+1. **System prompt** — the full system prompt (with `{{user_info}}` already injected) is sent as an array with `cache_control: { type: "ephemeral" }`. This is the largest static block and benefits most from caching.
+2. **Tools array** — `cache_control: { type: "ephemeral" }` is added to the last tool definition. Anthropic caches everything up to and including the marked entry, so the entire tools array is cached as a unit.
+Cache behaviour:
+- On the first request (cold cache), tokens are processed normally and a cache entry is written.
+- On subsequent requests within 1 hour, the cached prefix is reused — approximately 90% cost reduction and 85% latency reduction on the cached tokens.
+- The cache TTL resets on each hit, so active conversations stay warm indefinitely as long as turns arrive within 1 hour of each other.
+- If the tools array changes between turns (e.g. a new tool was saved), the cache is automatically invalidated because the content differs.
+### Auth
+Two auth paths are supported, detected by key prefix:
+- `sk-ant-oat*` (OAuth token from `claude setup-token`): uses `authToken` (→ `Authorization: Bearer`) + `anthropic-beta: oauth-2025-04-20,extended-cache-ttl-2025-01-13`
+- All other keys (standard API key): uses `apiKey` (→ `x-api-key`) + `anthropic-beta: extended-cache-ttl-2025-01-13`
+### Cache Usage Tracking
+The Anthropic API returns cache stats in the response `usage` object:
+- `cache_read_input_tokens` — tokens served from cache (cheap)
+- `cache_creation_input_tokens` — tokens written to cache (slightly more expensive than normal input)
+These are accumulated alongside `prompt_tokens` and `completion_tokens` in `usageAccum` and persisted to `session.metadata.tokenUsage` as `cacheRead` and `cacheCreation`. The Telegram `/usage` command displays them when non-zero.
 ## Logging
 We store a minimal, append-only JSONL log per session for human readability. Each line is one request/response cycle.

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@ducci/jarvis",
-  "version": "1.0.36",
+  "version": "1.0.38",
   "description": "A fully automated agent system that lives on a server.",
   "main": "./src/index.js",
   "type": "module",

package/src/channels/telegram/index.js CHANGED Viewed

@@ -36,8 +36,13 @@ export async function startTelegramChannel(config) {
     }
     const total = u.prompt + u.completion;
+    const cacheRead = u.cacheRead || 0;
+    const cacheCreation = u.cacheCreation || 0;
+    const cacheLines = (cacheRead > 0 || cacheCreation > 0)
+      ? `\nCache read:    ${cacheRead.toLocaleString()}\nCache written: ${cacheCreation.toLocaleString()}`
+      : '';
     await ctx.reply(
-      `Token usage for current session:\nIn:    ${u.prompt.toLocaleString()}\nOut:   ${u.completion.toLocaleString()}\nTotal: ${total.toLocaleString()}`
+      `Token usage for current session:\nIn:    ${u.prompt.toLocaleString()}\nOut:   ${u.completion.toLocaleString()}\nTotal: ${total.toLocaleString()}${cacheLines}`
     );
   });

package/src/server/agent.js CHANGED Viewed

@@ -37,6 +37,8 @@ function accumulateUsage(accum, result) {
   if (!u) return;
   accum.prompt += u.prompt_tokens || 0;
   accum.completion += u.completion_tokens || 0;
+  accum.cacheRead += u.cache_read_input_tokens || 0;
+  accum.cacheCreation += u.cache_creation_input_tokens || 0;
 }
 async function callModel(client, model, messages, tools) {
@@ -536,7 +538,7 @@ async function _runHandleChat(config, sessionId, userMessage) {
   }
   const allToolCalls = [];
-  const usageAccum = { prompt: 0, completion: 0 };
+  const usageAccum = { prompt: 0, completion: 0, cacheRead: 0, cacheCreation: 0 };
   let finalResponse = '';
   let finalLogSummary = '';
   let finalStatus = 'ok';
@@ -719,9 +721,11 @@ async function _runHandleChat(config, sessionId, userMessage) {
     throw e;
   } finally {
     // Accumulate token usage into session metadata so /usage can read it
-    if (!session.metadata.tokenUsage) session.metadata.tokenUsage = { prompt: 0, completion: 0 };
+    if (!session.metadata.tokenUsage) session.metadata.tokenUsage = { prompt: 0, completion: 0, cacheRead: 0, cacheCreation: 0 };
     session.metadata.tokenUsage.prompt += usageAccum.prompt;
     session.metadata.tokenUsage.completion += usageAccum.completion;
+    session.metadata.tokenUsage.cacheRead = (session.metadata.tokenUsage.cacheRead || 0) + usageAccum.cacheRead;
+    session.metadata.tokenUsage.cacheCreation = (session.metadata.tokenUsage.cacheCreation || 0) + usageAccum.cacheCreation;
     // Always persist the session — even if an unexpected error occurred.
     // A failed save must not mask the original error.

package/src/server/provider.js CHANGED Viewed

@@ -1,13 +1,15 @@
 import OpenAI from 'openai';
 import Anthropic from '@anthropic-ai/sdk';
-// Convert OpenAI tool definitions to Anthropic format
+// Convert OpenAI tool definitions to Anthropic format.
+// Cache_control on the last tool caches everything up to and including the full tools array.
 function openAIToolsToAnthropic(tools) {
   if (!tools || tools.length === 0) return [];
-  return tools.map(t => ({
+  return tools.map((t, i) => ({
     name: t.function.name,
     description: t.function.description || '',
     input_schema: t.function.parameters || { type: 'object', properties: {}, required: [] },
+    ...(i === tools.length - 1 ? { cache_control: { type: 'ephemeral' } } : {}),
   }));
 }
@@ -23,7 +25,8 @@ function openAIMessagesToAnthropic(messages) {
   let rest = messages;
   if (messages[0]?.role === 'system') {
-    system = messages[0].content;
+    // Array form allows cache_control; Anthropic accepts string or array for system
+    system = [{ type: 'text', text: messages[0].content, cache_control: { type: 'ephemeral' } }];
     rest = messages.slice(1);
   }
@@ -106,6 +109,8 @@ function anthropicResponseToOpenAI(response) {
       prompt_tokens: response.usage?.input_tokens ?? 0,
       completion_tokens: response.usage?.output_tokens ?? 0,
       total_tokens: (response.usage?.input_tokens ?? 0) + (response.usage?.output_tokens ?? 0),
+      cache_read_input_tokens: response.usage?.cache_read_input_tokens ?? 0,
+      cache_creation_input_tokens: response.usage?.cache_creation_input_tokens ?? 0,
     },
   };
 }