@ducci/jarvis 1.0.36 → 1.0.37
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/docs/agent.md +45 -0
- package/package.json +1 -1
- package/src/channels/telegram/index.js +6 -1
- package/src/server/agent.js +6 -2
- package/src/server/provider.js +10 -5
package/docs/agent.md
CHANGED
|
@@ -391,6 +391,51 @@ Internal flow summary:
|
|
|
391
391
|
5. Call the model again with the updated conversation.
|
|
392
392
|
6. Repeat until no tool calls are returned.
|
|
393
393
|
|
|
394
|
+
## Anthropic Provider
|
|
395
|
+
|
|
396
|
+
When `config.provider === 'anthropic'`, Jarvis uses the Anthropic SDK directly instead of OpenRouter. `src/server/provider.js` exposes an adapter that converts the OpenAI-compatible interface used throughout the codebase into Anthropic's native API format.
|
|
397
|
+
|
|
398
|
+
### Message Format Conversion
|
|
399
|
+
|
|
400
|
+
Key differences from OpenAI format:
|
|
401
|
+
|
|
402
|
+
- The `system` message is extracted from `messages[0]` and passed as a separate top-level `system` parameter (array form, to support `cache_control`).
|
|
403
|
+
- Assistant `tool_calls` are converted to `content` blocks of type `tool_use`.
|
|
404
|
+
- `role: "tool"` messages are grouped into `role: "user"` messages containing `tool_result` blocks.
|
|
405
|
+
- Consecutive `user` messages are merged (Anthropic requires strict user/assistant alternation).
|
|
406
|
+
|
|
407
|
+
### Prompt Caching
|
|
408
|
+
|
|
409
|
+
Jarvis enables Anthropic prompt caching to reduce cost and latency on repeated turns. The `extended-cache-ttl-2025-01-13` beta header is sent on every Anthropic request, upgrading the cache TTL from 5 minutes to **1 hour**.
|
|
410
|
+
|
|
411
|
+
Two cache breakpoints are set per request:
|
|
412
|
+
|
|
413
|
+
1. **System prompt** — the full system prompt (with `{{user_info}}` already injected) is sent as an array with `cache_control: { type: "ephemeral" }`. This is the largest static block and benefits most from caching.
|
|
414
|
+
|
|
415
|
+
2. **Tools array** — `cache_control: { type: "ephemeral" }` is added to the last tool definition. Anthropic caches everything up to and including the marked entry, so the entire tools array is cached as a unit.
|
|
416
|
+
|
|
417
|
+
Cache behaviour:
|
|
418
|
+
- On the first request (cold cache), tokens are processed normally and a cache entry is written.
|
|
419
|
+
- On subsequent requests within 1 hour, the cached prefix is reused — approximately 90% cost reduction and 85% latency reduction on the cached tokens.
|
|
420
|
+
- The cache TTL resets on each hit, so active conversations stay warm indefinitely as long as turns arrive within 1 hour of each other.
|
|
421
|
+
- If the tools array changes between turns (e.g. a new tool was saved), the cache is automatically invalidated because the content differs.
|
|
422
|
+
|
|
423
|
+
### Auth
|
|
424
|
+
|
|
425
|
+
Two auth paths are supported, detected by key prefix:
|
|
426
|
+
|
|
427
|
+
- `sk-ant-oat*` (OAuth token from `claude setup-token`): uses `authToken` (→ `Authorization: Bearer`) + `anthropic-beta: oauth-2025-04-20,extended-cache-ttl-2025-01-13`
|
|
428
|
+
- All other keys (standard API key): uses `apiKey` (→ `x-api-key`) + `anthropic-beta: extended-cache-ttl-2025-01-13`
|
|
429
|
+
|
|
430
|
+
### Cache Usage Tracking
|
|
431
|
+
|
|
432
|
+
The Anthropic API returns cache stats in the response `usage` object:
|
|
433
|
+
|
|
434
|
+
- `cache_read_input_tokens` — tokens served from cache (cheap)
|
|
435
|
+
- `cache_creation_input_tokens` — tokens written to cache (slightly more expensive than normal input)
|
|
436
|
+
|
|
437
|
+
These are accumulated alongside `prompt_tokens` and `completion_tokens` in `usageAccum` and persisted to `session.metadata.tokenUsage` as `cacheRead` and `cacheCreation`. The Telegram `/usage` command displays them when non-zero.
|
|
438
|
+
|
|
394
439
|
## Logging
|
|
395
440
|
|
|
396
441
|
We store a minimal, append-only JSONL log per session for human readability. Each line is one request/response cycle.
|
package/package.json
CHANGED
|
@@ -36,8 +36,13 @@ export async function startTelegramChannel(config) {
|
|
|
36
36
|
}
|
|
37
37
|
|
|
38
38
|
const total = u.prompt + u.completion;
|
|
39
|
+
const cacheRead = u.cacheRead || 0;
|
|
40
|
+
const cacheCreation = u.cacheCreation || 0;
|
|
41
|
+
const cacheLines = (cacheRead > 0 || cacheCreation > 0)
|
|
42
|
+
? `\nCache read: ${cacheRead.toLocaleString()}\nCache written: ${cacheCreation.toLocaleString()}`
|
|
43
|
+
: '';
|
|
39
44
|
await ctx.reply(
|
|
40
|
-
`Token usage for current session:\nIn: ${u.prompt.toLocaleString()}\nOut: ${u.completion.toLocaleString()}\nTotal: ${total.toLocaleString()}`
|
|
45
|
+
`Token usage for current session:\nIn: ${u.prompt.toLocaleString()}\nOut: ${u.completion.toLocaleString()}\nTotal: ${total.toLocaleString()}${cacheLines}`
|
|
41
46
|
);
|
|
42
47
|
});
|
|
43
48
|
|
package/src/server/agent.js
CHANGED
|
@@ -37,6 +37,8 @@ function accumulateUsage(accum, result) {
|
|
|
37
37
|
if (!u) return;
|
|
38
38
|
accum.prompt += u.prompt_tokens || 0;
|
|
39
39
|
accum.completion += u.completion_tokens || 0;
|
|
40
|
+
accum.cacheRead += u.cache_read_input_tokens || 0;
|
|
41
|
+
accum.cacheCreation += u.cache_creation_input_tokens || 0;
|
|
40
42
|
}
|
|
41
43
|
|
|
42
44
|
async function callModel(client, model, messages, tools) {
|
|
@@ -536,7 +538,7 @@ async function _runHandleChat(config, sessionId, userMessage) {
|
|
|
536
538
|
}
|
|
537
539
|
|
|
538
540
|
const allToolCalls = [];
|
|
539
|
-
const usageAccum = { prompt: 0, completion: 0 };
|
|
541
|
+
const usageAccum = { prompt: 0, completion: 0, cacheRead: 0, cacheCreation: 0 };
|
|
540
542
|
let finalResponse = '';
|
|
541
543
|
let finalLogSummary = '';
|
|
542
544
|
let finalStatus = 'ok';
|
|
@@ -719,9 +721,11 @@ async function _runHandleChat(config, sessionId, userMessage) {
|
|
|
719
721
|
throw e;
|
|
720
722
|
} finally {
|
|
721
723
|
// Accumulate token usage into session metadata so /usage can read it
|
|
722
|
-
if (!session.metadata.tokenUsage) session.metadata.tokenUsage = { prompt: 0, completion: 0 };
|
|
724
|
+
if (!session.metadata.tokenUsage) session.metadata.tokenUsage = { prompt: 0, completion: 0, cacheRead: 0, cacheCreation: 0 };
|
|
723
725
|
session.metadata.tokenUsage.prompt += usageAccum.prompt;
|
|
724
726
|
session.metadata.tokenUsage.completion += usageAccum.completion;
|
|
727
|
+
session.metadata.tokenUsage.cacheRead = (session.metadata.tokenUsage.cacheRead || 0) + usageAccum.cacheRead;
|
|
728
|
+
session.metadata.tokenUsage.cacheCreation = (session.metadata.tokenUsage.cacheCreation || 0) + usageAccum.cacheCreation;
|
|
725
729
|
|
|
726
730
|
// Always persist the session — even if an unexpected error occurred.
|
|
727
731
|
// A failed save must not mask the original error.
|
package/src/server/provider.js
CHANGED
|
@@ -1,13 +1,15 @@
|
|
|
1
1
|
import OpenAI from 'openai';
|
|
2
2
|
import Anthropic from '@anthropic-ai/sdk';
|
|
3
3
|
|
|
4
|
-
// Convert OpenAI tool definitions to Anthropic format
|
|
4
|
+
// Convert OpenAI tool definitions to Anthropic format.
|
|
5
|
+
// Cache_control on the last tool caches everything up to and including the full tools array.
|
|
5
6
|
function openAIToolsToAnthropic(tools) {
|
|
6
7
|
if (!tools || tools.length === 0) return [];
|
|
7
|
-
return tools.map(t => ({
|
|
8
|
+
return tools.map((t, i) => ({
|
|
8
9
|
name: t.function.name,
|
|
9
10
|
description: t.function.description || '',
|
|
10
11
|
input_schema: t.function.parameters || { type: 'object', properties: {}, required: [] },
|
|
12
|
+
...(i === tools.length - 1 ? { cache_control: { type: 'ephemeral' } } : {}),
|
|
11
13
|
}));
|
|
12
14
|
}
|
|
13
15
|
|
|
@@ -23,7 +25,8 @@ function openAIMessagesToAnthropic(messages) {
|
|
|
23
25
|
let rest = messages;
|
|
24
26
|
|
|
25
27
|
if (messages[0]?.role === 'system') {
|
|
26
|
-
|
|
28
|
+
// Array form allows cache_control; Anthropic accepts string or array for system
|
|
29
|
+
system = [{ type: 'text', text: messages[0].content, cache_control: { type: 'ephemeral' } }];
|
|
27
30
|
rest = messages.slice(1);
|
|
28
31
|
}
|
|
29
32
|
|
|
@@ -106,6 +109,8 @@ function anthropicResponseToOpenAI(response) {
|
|
|
106
109
|
prompt_tokens: response.usage?.input_tokens ?? 0,
|
|
107
110
|
completion_tokens: response.usage?.output_tokens ?? 0,
|
|
108
111
|
total_tokens: (response.usage?.input_tokens ?? 0) + (response.usage?.output_tokens ?? 0),
|
|
112
|
+
cache_read_input_tokens: response.usage?.cache_read_input_tokens ?? 0,
|
|
113
|
+
cache_creation_input_tokens: response.usage?.cache_creation_input_tokens ?? 0,
|
|
109
114
|
},
|
|
110
115
|
};
|
|
111
116
|
}
|
|
@@ -114,8 +119,8 @@ function anthropicResponseToOpenAI(response) {
|
|
|
114
119
|
function createAnthropicClient(apiKey) {
|
|
115
120
|
const isOAuthToken = apiKey.startsWith('sk-ant-oat');
|
|
116
121
|
const anthropic = isOAuthToken
|
|
117
|
-
? new Anthropic({ authToken: apiKey, defaultHeaders: { 'anthropic-beta': 'oauth-2025-04-20' } })
|
|
118
|
-
: new Anthropic({ apiKey });
|
|
122
|
+
? new Anthropic({ authToken: apiKey, defaultHeaders: { 'anthropic-beta': 'oauth-2025-04-20,extended-cache-ttl-2025-01-13' } })
|
|
123
|
+
: new Anthropic({ apiKey, defaultHeaders: { 'anthropic-beta': 'extended-cache-ttl-2025-01-13' } });
|
|
119
124
|
|
|
120
125
|
return {
|
|
121
126
|
chat: {
|