@ducci/jarvis 1.0.36 → 1.0.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/docs/agent.md CHANGED
@@ -391,6 +391,51 @@ Internal flow summary:
391
391
  5. Call the model again with the updated conversation.
392
392
  6. Repeat until no tool calls are returned.
393
393
 
394
+ ## Anthropic Provider
395
+
396
+ When `config.provider === 'anthropic'`, Jarvis uses the Anthropic SDK directly instead of OpenRouter. `src/server/provider.js` exposes an adapter that converts the OpenAI-compatible interface used throughout the codebase into Anthropic's native API format.
397
+
398
+ ### Message Format Conversion
399
+
400
+ Key differences from OpenAI format:
401
+
402
+ - The `system` message is extracted from `messages[0]` and passed as a separate top-level `system` parameter (array form, to support `cache_control`).
403
+ - Assistant `tool_calls` are converted to `content` blocks of type `tool_use`.
404
+ - `role: "tool"` messages are grouped into `role: "user"` messages containing `tool_result` blocks.
405
+ - Consecutive `user` messages are merged (Anthropic requires strict user/assistant alternation).
406
+
407
+ ### Prompt Caching
408
+
409
+ Jarvis enables Anthropic prompt caching to reduce cost and latency on repeated turns. The `extended-cache-ttl-2025-01-13` beta header is sent on every Anthropic request, upgrading the cache TTL from 5 minutes to **1 hour**.
410
+
411
+ Two cache breakpoints are set per request:
412
+
413
+ 1. **System prompt** — the full system prompt (with `{{user_info}}` already injected) is sent as an array with `cache_control: { type: "ephemeral" }`. This is the largest static block and benefits most from caching.
414
+
415
+ 2. **Tools array** — `cache_control: { type: "ephemeral" }` is added to the last tool definition. Anthropic caches everything up to and including the marked entry, so the entire tools array is cached as a unit.
416
+
417
+ Cache behaviour:
418
+ - On the first request (cold cache), tokens are processed normally and a cache entry is written.
419
+ - On subsequent requests within 1 hour, the cached prefix is reused — approximately 90% cost reduction and 85% latency reduction on the cached tokens.
420
+ - The cache TTL resets on each hit, so active conversations stay warm indefinitely as long as turns arrive within 1 hour of each other.
421
+ - If the tools array changes between turns (e.g. a new tool was saved), the cache is automatically invalidated because the content differs.
422
+
423
+ ### Auth
424
+
425
+ Two auth paths are supported, detected by key prefix:
426
+
427
+ - `sk-ant-oat*` (OAuth token from `claude setup-token`): uses `authToken` (→ `Authorization: Bearer`) + `anthropic-beta: oauth-2025-04-20,extended-cache-ttl-2025-01-13`
428
+ - All other keys (standard API key): uses `apiKey` (→ `x-api-key`) + `anthropic-beta: extended-cache-ttl-2025-01-13`
429
+
430
+ ### Cache Usage Tracking
431
+
432
+ The Anthropic API returns cache stats in the response `usage` object:
433
+
434
+ - `cache_read_input_tokens` — tokens served from cache (cheap)
435
+ - `cache_creation_input_tokens` — tokens written to cache (slightly more expensive than normal input)
436
+
437
+ These are accumulated alongside `prompt_tokens` and `completion_tokens` in `usageAccum` and persisted to `session.metadata.tokenUsage` as `cacheRead` and `cacheCreation`. The Telegram `/usage` command displays them when non-zero.
438
+
394
439
  ## Logging
395
440
 
396
441
  We store a minimal, append-only JSONL log per session for human readability. Each line is one request/response cycle.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@ducci/jarvis",
3
- "version": "1.0.36",
3
+ "version": "1.0.38",
4
4
  "description": "A fully automated agent system that lives on a server.",
5
5
  "main": "./src/index.js",
6
6
  "type": "module",
@@ -36,8 +36,13 @@ export async function startTelegramChannel(config) {
36
36
  }
37
37
 
38
38
  const total = u.prompt + u.completion;
39
+ const cacheRead = u.cacheRead || 0;
40
+ const cacheCreation = u.cacheCreation || 0;
41
+ const cacheLines = (cacheRead > 0 || cacheCreation > 0)
42
+ ? `\nCache read: ${cacheRead.toLocaleString()}\nCache written: ${cacheCreation.toLocaleString()}`
43
+ : '';
39
44
  await ctx.reply(
40
- `Token usage for current session:\nIn: ${u.prompt.toLocaleString()}\nOut: ${u.completion.toLocaleString()}\nTotal: ${total.toLocaleString()}`
45
+ `Token usage for current session:\nIn: ${u.prompt.toLocaleString()}\nOut: ${u.completion.toLocaleString()}\nTotal: ${total.toLocaleString()}${cacheLines}`
41
46
  );
42
47
  });
43
48
 
@@ -37,6 +37,8 @@ function accumulateUsage(accum, result) {
37
37
  if (!u) return;
38
38
  accum.prompt += u.prompt_tokens || 0;
39
39
  accum.completion += u.completion_tokens || 0;
40
+ accum.cacheRead += u.cache_read_input_tokens || 0;
41
+ accum.cacheCreation += u.cache_creation_input_tokens || 0;
40
42
  }
41
43
 
42
44
  async function callModel(client, model, messages, tools) {
@@ -536,7 +538,7 @@ async function _runHandleChat(config, sessionId, userMessage) {
536
538
  }
537
539
 
538
540
  const allToolCalls = [];
539
- const usageAccum = { prompt: 0, completion: 0 };
541
+ const usageAccum = { prompt: 0, completion: 0, cacheRead: 0, cacheCreation: 0 };
540
542
  let finalResponse = '';
541
543
  let finalLogSummary = '';
542
544
  let finalStatus = 'ok';
@@ -719,9 +721,11 @@ async function _runHandleChat(config, sessionId, userMessage) {
719
721
  throw e;
720
722
  } finally {
721
723
  // Accumulate token usage into session metadata so /usage can read it
722
- if (!session.metadata.tokenUsage) session.metadata.tokenUsage = { prompt: 0, completion: 0 };
724
+ if (!session.metadata.tokenUsage) session.metadata.tokenUsage = { prompt: 0, completion: 0, cacheRead: 0, cacheCreation: 0 };
723
725
  session.metadata.tokenUsage.prompt += usageAccum.prompt;
724
726
  session.metadata.tokenUsage.completion += usageAccum.completion;
727
+ session.metadata.tokenUsage.cacheRead = (session.metadata.tokenUsage.cacheRead || 0) + usageAccum.cacheRead;
728
+ session.metadata.tokenUsage.cacheCreation = (session.metadata.tokenUsage.cacheCreation || 0) + usageAccum.cacheCreation;
725
729
 
726
730
  // Always persist the session — even if an unexpected error occurred.
727
731
  // A failed save must not mask the original error.
@@ -1,13 +1,15 @@
1
1
  import OpenAI from 'openai';
2
2
  import Anthropic from '@anthropic-ai/sdk';
3
3
 
4
- // Convert OpenAI tool definitions to Anthropic format
4
+ // Convert OpenAI tool definitions to Anthropic format.
5
+ // Cache_control on the last tool caches everything up to and including the full tools array.
5
6
  function openAIToolsToAnthropic(tools) {
6
7
  if (!tools || tools.length === 0) return [];
7
- return tools.map(t => ({
8
+ return tools.map((t, i) => ({
8
9
  name: t.function.name,
9
10
  description: t.function.description || '',
10
11
  input_schema: t.function.parameters || { type: 'object', properties: {}, required: [] },
12
+ ...(i === tools.length - 1 ? { cache_control: { type: 'ephemeral' } } : {}),
11
13
  }));
12
14
  }
13
15
 
@@ -23,7 +25,8 @@ function openAIMessagesToAnthropic(messages) {
23
25
  let rest = messages;
24
26
 
25
27
  if (messages[0]?.role === 'system') {
26
- system = messages[0].content;
28
+ // Array form allows cache_control; Anthropic accepts string or array for system
29
+ system = [{ type: 'text', text: messages[0].content, cache_control: { type: 'ephemeral' } }];
27
30
  rest = messages.slice(1);
28
31
  }
29
32
 
@@ -106,6 +109,8 @@ function anthropicResponseToOpenAI(response) {
106
109
  prompt_tokens: response.usage?.input_tokens ?? 0,
107
110
  completion_tokens: response.usage?.output_tokens ?? 0,
108
111
  total_tokens: (response.usage?.input_tokens ?? 0) + (response.usage?.output_tokens ?? 0),
112
+ cache_read_input_tokens: response.usage?.cache_read_input_tokens ?? 0,
113
+ cache_creation_input_tokens: response.usage?.cache_creation_input_tokens ?? 0,
109
114
  },
110
115
  };
111
116
  }