@genesislcap/foundation-ai 14.438.0 → 14.439.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/dts/ai-provider.d.ts +9 -3
- package/dist/dts/ai-provider.d.ts.map +1 -1
- package/dist/dts/index.d.ts +5 -2
- package/dist/dts/index.d.ts.map +1 -1
- package/dist/dts/providers/anthropic-provider.d.ts +26 -0
- package/dist/dts/providers/anthropic-provider.d.ts.map +1 -0
- package/dist/dts/transports/anthropic-transport.d.ts +93 -0
- package/dist/dts/transports/anthropic-transport.d.ts.map +1 -0
- package/dist/dts/transports/gemini-transport.d.ts +19 -2
- package/dist/dts/transports/gemini-transport.d.ts.map +1 -1
- package/dist/dts/transports/server-openai-transport.d.ts +1 -0
- package/dist/dts/transports/server-openai-transport.d.ts.map +1 -1
- package/dist/dts/types/chat.types.d.ts +25 -0
- package/dist/dts/types/chat.types.d.ts.map +1 -1
- package/dist/dts/types/config.types.d.ts +33 -2
- package/dist/dts/types/config.types.d.ts.map +1 -1
- package/dist/dts/types/status.types.d.ts +7 -0
- package/dist/dts/types/status.types.d.ts.map +1 -1
- package/dist/dts/types/transports.types.d.ts +22 -0
- package/dist/dts/types/transports.types.d.ts.map +1 -1
- package/dist/esm/ai-provider.js +24 -1
- package/dist/esm/index.js +3 -1
- package/dist/esm/providers/anthropic-provider.js +64 -0
- package/dist/esm/providers/gemini-provider.js +1 -1
- package/dist/esm/providers/openai-provider.js +2 -2
- package/dist/esm/transports/anthropic-transport.js +367 -0
- package/dist/esm/transports/gemini-transport.js +50 -3
- package/dist/esm/transports/server-openai-transport.js +16 -1
- package/dist/esm/types/config.types.js +6 -0
- package/dist/foundation-ai.api.json +1351 -7
- package/dist/foundation-ai.d.ts +230 -6
- package/package.json +11 -11
|
@@ -0,0 +1,367 @@
|
|
|
1
|
+
import { __awaiter } from "tslib";
|
|
2
|
+
import { SUPPORTED_ANTHROPIC_MODEL_IDS, } from '../types';
|
|
3
|
+
import { logger } from '../utils/logger';
|
|
4
|
+
const ANTHROPIC_DIRECT_URL = 'https://api.anthropic.com/v1/messages';
|
|
5
|
+
const ANTHROPIC_API_VERSION = '2023-06-01';
|
|
6
|
+
const DEFAULT_MODEL = 'claude-haiku-4-5-20251001';
|
|
7
|
+
const DEFAULT_TIMEOUT = 180000; // 3 minutes
|
|
8
|
+
const DEFAULT_MAX_TOKENS = 4096;
|
|
9
|
+
/**
|
|
10
|
+
* Maximum input context window (tokens) per supported Claude model.
|
|
11
|
+
* Source: https://docs.claude.com/en/docs/about-claude/models/overview
|
|
12
|
+
*/
|
|
13
|
+
const ANTHROPIC_CONTEXT_LIMITS = {
|
|
14
|
+
'claude-opus-4-7': 1000000,
|
|
15
|
+
'claude-sonnet-4-6': 1000000,
|
|
16
|
+
'claude-haiku-4-5-20251001': 200000,
|
|
17
|
+
};
|
|
18
|
+
/**
|
|
19
|
+
* Tool name used to coerce JSON-schema output from the model when the caller
|
|
20
|
+
* provided a `responseSchema`. The transport forces `tool_choice` to this tool
|
|
21
|
+
* and surfaces the `input` field as the structured response.
|
|
22
|
+
*/
|
|
23
|
+
const STRUCTURED_OUTPUT_TOOL_NAME = 'emit_structured_response';
|
|
24
|
+
function assertSupportedAnthropicModel(model) {
|
|
25
|
+
if (!SUPPORTED_ANTHROPIC_MODEL_IDS.includes(model)) {
|
|
26
|
+
throw new Error(`AnthropicTransport: unsupported model "${model}". Use one of: ${SUPPORTED_ANTHROPIC_MODEL_IDS.join(', ')}.`);
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* Standard tier pricing per million tokens — https://docs.claude.com/en/docs/about-claude/pricing
|
|
31
|
+
*/
|
|
32
|
+
function estimatedAnthropicRatesUsdPerMillion(model) {
|
|
33
|
+
if (model === 'claude-haiku-4-5-20251001') {
|
|
34
|
+
return { promptPerMillion: 1, candidatePerMillion: 5 };
|
|
35
|
+
}
|
|
36
|
+
if (model === 'claude-sonnet-4-6') {
|
|
37
|
+
return { promptPerMillion: 3, candidatePerMillion: 15 };
|
|
38
|
+
}
|
|
39
|
+
// Opus 4.7
|
|
40
|
+
return { promptPerMillion: 5, candidatePerMillion: 25 };
|
|
41
|
+
}
|
|
42
|
+
/**
|
|
43
|
+
* Transport for Anthropic Claude. Calls the Messages API directly when `apiKey`
|
|
44
|
+
* is provided, otherwise falls back to a server-proxy endpoint (if `serverEndpoint`
|
|
45
|
+
* is configured).
|
|
46
|
+
*
|
|
47
|
+
* Implements `AITransport` (structured prompt via tool-forcing) and `ChatTransport`
|
|
48
|
+
* (multi-turn chat).
|
|
49
|
+
*
|
|
50
|
+
* @beta
|
|
51
|
+
*/
|
|
52
|
+
export class AnthropicTransport {
|
|
53
|
+
constructor(config = {}) {
|
|
54
|
+
var _a, _b, _c;
|
|
55
|
+
/**
|
|
56
|
+
* Estimated USD cost accumulated across every successful request on this
|
|
57
|
+
* transport instance. Convenience accessor for non-chat consumers
|
|
58
|
+
* (telemetry, debug overlays). The main chat UI sums per-message `cost`
|
|
59
|
+
* fields instead so its session total stays attributed to chat turns only.
|
|
60
|
+
*/
|
|
61
|
+
this.lifetimeCostUsd = 0;
|
|
62
|
+
const model = (_a = config.model) !== null && _a !== void 0 ? _a : DEFAULT_MODEL;
|
|
63
|
+
assertSupportedAnthropicModel(model);
|
|
64
|
+
this.model = model;
|
|
65
|
+
if (model === 'claude-sonnet-4-6') {
|
|
66
|
+
logger.warn('AnthropicTransport: using claude-sonnet-4-6 — higher cost than Haiku; use for stronger reasoning or agent tasks.');
|
|
67
|
+
}
|
|
68
|
+
else if (model === 'claude-opus-4-7') {
|
|
69
|
+
logger.warn('AnthropicTransport: using claude-opus-4-7 — significantly higher cost; reserve for tasks where Sonnet reliability is insufficient.');
|
|
70
|
+
}
|
|
71
|
+
this.timeout = (_b = config.timeout) !== null && _b !== void 0 ? _b : DEFAULT_TIMEOUT;
|
|
72
|
+
this.apiKey = config.apiKey;
|
|
73
|
+
this.serverEndpoint = config.serverEndpoint;
|
|
74
|
+
this.maxTokens = (_c = config.maxTokens) !== null && _c !== void 0 ? _c : DEFAULT_MAX_TOKENS;
|
|
75
|
+
}
|
|
76
|
+
getConfig() {
|
|
77
|
+
return {
|
|
78
|
+
provider: 'anthropic',
|
|
79
|
+
model: this.model,
|
|
80
|
+
contextLimit: ANTHROPIC_CONTEXT_LIMITS[this.model],
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
/** Estimated USD cost accumulated across every successful request on this transport instance. */
|
|
84
|
+
getLifetimeCost() {
|
|
85
|
+
return this.lifetimeCostUsd;
|
|
86
|
+
}
|
|
87
|
+
/** Reset the lifetime cost counter. Intended for chat-clear / new-session flows. */
|
|
88
|
+
resetLifetimeCost() {
|
|
89
|
+
this.lifetimeCostUsd = 0;
|
|
90
|
+
}
|
|
91
|
+
// ── AITransport (structured prompt) ────────────────────────────────────
|
|
92
|
+
sendStructuredPrompt(options) {
|
|
93
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
94
|
+
var _a, _b, _c;
|
|
95
|
+
const { systemPrompt, userPrompt, responseSchema } = options;
|
|
96
|
+
const messages = [{ role: 'user', content: userPrompt }];
|
|
97
|
+
// Anthropic has no native JSON-schema response format. The supported pattern
|
|
98
|
+
// is to define a tool whose input_schema is the desired schema, then force
|
|
99
|
+
// the model to call it via tool_choice. The tool's `input` is the structured
|
|
100
|
+
// payload we surface back to the caller as a JSON string.
|
|
101
|
+
const tools = responseSchema
|
|
102
|
+
? [
|
|
103
|
+
{
|
|
104
|
+
name: STRUCTURED_OUTPUT_TOOL_NAME,
|
|
105
|
+
description: 'Emit the structured response that matches the required schema.',
|
|
106
|
+
input_schema: responseSchema,
|
|
107
|
+
},
|
|
108
|
+
]
|
|
109
|
+
: undefined;
|
|
110
|
+
const body = {
|
|
111
|
+
model: this.model,
|
|
112
|
+
max_tokens: this.maxTokens,
|
|
113
|
+
messages,
|
|
114
|
+
};
|
|
115
|
+
if (systemPrompt)
|
|
116
|
+
body.system = systemPrompt;
|
|
117
|
+
if (tools) {
|
|
118
|
+
body.tools = tools;
|
|
119
|
+
body.tool_choice = { type: 'tool', name: STRUCTURED_OUTPUT_TOOL_NAME };
|
|
120
|
+
}
|
|
121
|
+
const response = yield this.post(body);
|
|
122
|
+
if (responseSchema) {
|
|
123
|
+
const toolUse = ((_a = response.content) !== null && _a !== void 0 ? _a : []).find((b) => b.type === 'tool_use' && b.name === STRUCTURED_OUTPUT_TOOL_NAME);
|
|
124
|
+
return toolUse ? JSON.stringify((_b = toolUse.input) !== null && _b !== void 0 ? _b : {}) : '';
|
|
125
|
+
}
|
|
126
|
+
return ((_c = response.content) !== null && _c !== void 0 ? _c : [])
|
|
127
|
+
.filter((b) => b.type === 'text')
|
|
128
|
+
.map((b) => b.text)
|
|
129
|
+
.join('');
|
|
130
|
+
});
|
|
131
|
+
}
|
|
132
|
+
// ── ChatTransport (multi-turn chat) ────────────────────────────────────
|
|
133
|
+
sendChatMessage(history, userMessage, options) {
|
|
134
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
135
|
+
var _a;
|
|
136
|
+
const messages = this.toAnthropicMessages(history, userMessage, options === null || options === void 0 ? void 0 : options.attachments);
|
|
137
|
+
const body = {
|
|
138
|
+
model: this.model,
|
|
139
|
+
max_tokens: this.maxTokens,
|
|
140
|
+
messages,
|
|
141
|
+
};
|
|
142
|
+
if (options === null || options === void 0 ? void 0 : options.systemPrompt)
|
|
143
|
+
body.system = options.systemPrompt;
|
|
144
|
+
if ((_a = options === null || options === void 0 ? void 0 : options.tools) === null || _a === void 0 ? void 0 : _a.length) {
|
|
145
|
+
body.tools = options.tools.map((t) => ({
|
|
146
|
+
name: t.name,
|
|
147
|
+
description: t.description,
|
|
148
|
+
input_schema: t.parameters,
|
|
149
|
+
}));
|
|
150
|
+
}
|
|
151
|
+
const response = yield this.post(body, options === null || options === void 0 ? void 0 : options.signal);
|
|
152
|
+
return this.fromAnthropicResponse(response);
|
|
153
|
+
});
|
|
154
|
+
}
|
|
155
|
+
/**
|
|
156
|
+
* Logs the per-call cost breakdown, accumulates the lifetime running total,
|
|
157
|
+
* and returns the per-call total so the caller can attach it to the response
|
|
158
|
+
* message.
|
|
159
|
+
*/
|
|
160
|
+
logTokenUsage(promptTokens, candidateTokens) {
|
|
161
|
+
const { promptPerMillion, candidatePerMillion } = estimatedAnthropicRatesUsdPerMillion(this.model);
|
|
162
|
+
const promptCost = (promptTokens / AnthropicTransport.TOKENS_PER_MILLION) * promptPerMillion;
|
|
163
|
+
const candidateCost = (candidateTokens / AnthropicTransport.TOKENS_PER_MILLION) * candidatePerMillion;
|
|
164
|
+
const totalCost = promptCost + candidateCost;
|
|
165
|
+
this.lifetimeCostUsd += totalCost;
|
|
166
|
+
console.log(`--- Anthropic Token Usage (${this.model}) ---`);
|
|
167
|
+
console.log(`Prompt Tokens: ${promptTokens} ($${promptCost.toFixed(AnthropicTransport.COST_DECIMAL_PLACES)})`);
|
|
168
|
+
console.log(`Candidate Tokens: ${candidateTokens} ($${candidateCost.toFixed(AnthropicTransport.COST_DECIMAL_PLACES)})`);
|
|
169
|
+
console.log(`Total Cost: $${totalCost.toFixed(AnthropicTransport.COST_DECIMAL_PLACES)}`);
|
|
170
|
+
console.log(`Lifetime Cost: $${this.lifetimeCostUsd.toFixed(AnthropicTransport.COST_DECIMAL_PLACES)}`);
|
|
171
|
+
console.log('--------------------------');
|
|
172
|
+
return totalCost;
|
|
173
|
+
}
|
|
174
|
+
/**
|
|
175
|
+
* Convert the internal `ChatMessage[]` history into Anthropic's message format.
|
|
176
|
+
*
|
|
177
|
+
* Anthropic alternates strictly `user` / `assistant`. Tool calls are emitted by
|
|
178
|
+
* the assistant as `tool_use` blocks; the corresponding `tool_result` blocks
|
|
179
|
+
* must appear in the *next* user message and reference the tool by `tool_use_id`.
|
|
180
|
+
* Consecutive same-role turns are merged by the API but we merge here to keep
|
|
181
|
+
* the payload tidy.
|
|
182
|
+
*/
|
|
183
|
+
toAnthropicMessages(history, userMessage, attachments) {
|
|
184
|
+
var _a, _b, _c;
|
|
185
|
+
const messages = [];
|
|
186
|
+
const pushBlock = (role, block) => {
|
|
187
|
+
const last = messages[messages.length - 1];
|
|
188
|
+
if (last && last.role === role && Array.isArray(last.content)) {
|
|
189
|
+
last.content.push(block);
|
|
190
|
+
return;
|
|
191
|
+
}
|
|
192
|
+
messages.push({ role, content: [block] });
|
|
193
|
+
};
|
|
194
|
+
for (const msg of history) {
|
|
195
|
+
if (msg.role === 'system' || msg.role === 'system-event' || msg.thinking)
|
|
196
|
+
continue;
|
|
197
|
+
if (msg.toolResult) {
|
|
198
|
+
pushBlock('user', {
|
|
199
|
+
type: 'tool_result',
|
|
200
|
+
tool_use_id: msg.toolResult.toolCallId,
|
|
201
|
+
content: msg.toolResult.content,
|
|
202
|
+
});
|
|
203
|
+
continue;
|
|
204
|
+
}
|
|
205
|
+
if ((_a = msg.toolCalls) === null || _a === void 0 ? void 0 : _a.length) {
|
|
206
|
+
if (msg.content) {
|
|
207
|
+
pushBlock('assistant', { type: 'text', text: msg.content });
|
|
208
|
+
}
|
|
209
|
+
for (const tc of msg.toolCalls) {
|
|
210
|
+
pushBlock('assistant', {
|
|
211
|
+
type: 'tool_use',
|
|
212
|
+
id: tc.id,
|
|
213
|
+
name: tc.name,
|
|
214
|
+
input: (_b = tc.args) !== null && _b !== void 0 ? _b : {},
|
|
215
|
+
});
|
|
216
|
+
}
|
|
217
|
+
continue;
|
|
218
|
+
}
|
|
219
|
+
const role = msg.role === 'user' ? 'user' : 'assistant';
|
|
220
|
+
if (role === 'user' && ((_c = msg.attachments) === null || _c === void 0 ? void 0 : _c.length)) {
|
|
221
|
+
pushBlock(role, { type: 'text', text: msg.content });
|
|
222
|
+
for (const att of msg.attachments) {
|
|
223
|
+
pushBlock(role, { type: 'text', text: `[File: ${att.name}]\n${att.content}` });
|
|
224
|
+
}
|
|
225
|
+
}
|
|
226
|
+
else if (msg.content) {
|
|
227
|
+
pushBlock(role, { type: 'text', text: msg.content });
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
if (userMessage || (attachments === null || attachments === void 0 ? void 0 : attachments.length)) {
|
|
231
|
+
if (userMessage) {
|
|
232
|
+
pushBlock('user', { type: 'text', text: userMessage });
|
|
233
|
+
}
|
|
234
|
+
for (const att of attachments !== null && attachments !== void 0 ? attachments : []) {
|
|
235
|
+
pushBlock('user', { type: 'text', text: `[File: ${att.name}]\n${att.content}` });
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
return messages;
|
|
239
|
+
}
|
|
240
|
+
fromAnthropicResponse(response) {
|
|
241
|
+
var _a, _b, _c, _d;
|
|
242
|
+
let inputTokens;
|
|
243
|
+
let outputTokens;
|
|
244
|
+
let cost;
|
|
245
|
+
if (response.usage) {
|
|
246
|
+
cost = this.logTokenUsage((_a = response.usage.input_tokens) !== null && _a !== void 0 ? _a : 0, (_b = response.usage.output_tokens) !== null && _b !== void 0 ? _b : 0);
|
|
247
|
+
if (response.usage.input_tokens != null) {
|
|
248
|
+
inputTokens = response.usage.input_tokens;
|
|
249
|
+
}
|
|
250
|
+
if (response.usage.output_tokens != null) {
|
|
251
|
+
outputTokens = response.usage.output_tokens;
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
const blocks = (_c = response.content) !== null && _c !== void 0 ? _c : [];
|
|
255
|
+
const toolCalls = [];
|
|
256
|
+
const thoughtParts = [];
|
|
257
|
+
const textParts = [];
|
|
258
|
+
for (const block of blocks) {
|
|
259
|
+
if (block.type === 'tool_use') {
|
|
260
|
+
toolCalls.push({
|
|
261
|
+
id: block.id,
|
|
262
|
+
name: block.name,
|
|
263
|
+
args: (_d = block.input) !== null && _d !== void 0 ? _d : {},
|
|
264
|
+
});
|
|
265
|
+
}
|
|
266
|
+
else if (block.type === 'thinking') {
|
|
267
|
+
thoughtParts.push(block.thinking);
|
|
268
|
+
}
|
|
269
|
+
else if (block.type === 'text') {
|
|
270
|
+
textParts.push(block.text);
|
|
271
|
+
}
|
|
272
|
+
}
|
|
273
|
+
const base = toolCalls.length > 0
|
|
274
|
+
? {
|
|
275
|
+
role: 'assistant',
|
|
276
|
+
content: thoughtParts.join('') || textParts.join(''),
|
|
277
|
+
toolCalls,
|
|
278
|
+
}
|
|
279
|
+
: { role: 'assistant', content: textParts.join('') };
|
|
280
|
+
if (inputTokens != null)
|
|
281
|
+
base.inputTokens = inputTokens;
|
|
282
|
+
if (outputTokens != null)
|
|
283
|
+
base.outputTokens = outputTokens;
|
|
284
|
+
if (cost != null)
|
|
285
|
+
base.cost = cost;
|
|
286
|
+
return base;
|
|
287
|
+
}
|
|
288
|
+
buildEndpoint() {
|
|
289
|
+
if (this.apiKey) {
|
|
290
|
+
return {
|
|
291
|
+
url: ANTHROPIC_DIRECT_URL,
|
|
292
|
+
headers: {
|
|
293
|
+
'Content-Type': 'application/json',
|
|
294
|
+
'x-api-key': this.apiKey,
|
|
295
|
+
'anthropic-version': ANTHROPIC_API_VERSION,
|
|
296
|
+
// Anthropic blocks browser-origin requests by default. This header is the
|
|
297
|
+
// documented opt-in (equivalent to the SDK's `dangerouslyAllowBrowser`).
|
|
298
|
+
'anthropic-dangerous-direct-browser-access': 'true',
|
|
299
|
+
},
|
|
300
|
+
credentials: 'omit',
|
|
301
|
+
};
|
|
302
|
+
}
|
|
303
|
+
if (this.serverEndpoint) {
|
|
304
|
+
return {
|
|
305
|
+
url: this.serverEndpoint,
|
|
306
|
+
headers: { 'Content-Type': 'application/json' },
|
|
307
|
+
credentials: 'include',
|
|
308
|
+
};
|
|
309
|
+
}
|
|
310
|
+
throw new Error('AnthropicTransport: no API key or server endpoint configured. ' +
|
|
311
|
+
'Set ANTHROPIC_API_KEY in your .env or pass apiKey/serverEndpoint in AnthropicAIConfig.');
|
|
312
|
+
}
|
|
313
|
+
post(body, signal) {
|
|
314
|
+
return __awaiter(this, void 0, void 0, function* () {
|
|
315
|
+
const { url, headers, credentials } = this.buildEndpoint();
|
|
316
|
+
// Sequential retries are intentional — each attempt depends on the previous response.
|
|
317
|
+
/* eslint-disable no-await-in-loop */
|
|
318
|
+
for (let attempt = 0; attempt <= AnthropicTransport.MAX_RETRIES; attempt += 1) {
|
|
319
|
+
const controller = new AbortController();
|
|
320
|
+
const timeoutId = setTimeout(() => controller.abort(), this.timeout);
|
|
321
|
+
let response;
|
|
322
|
+
try {
|
|
323
|
+
response = yield fetch(url, {
|
|
324
|
+
method: 'POST',
|
|
325
|
+
headers,
|
|
326
|
+
body: JSON.stringify(body),
|
|
327
|
+
signal: signal !== null && signal !== void 0 ? signal : controller.signal,
|
|
328
|
+
credentials,
|
|
329
|
+
});
|
|
330
|
+
}
|
|
331
|
+
catch (e) {
|
|
332
|
+
clearTimeout(timeoutId);
|
|
333
|
+
throw e;
|
|
334
|
+
}
|
|
335
|
+
clearTimeout(timeoutId);
|
|
336
|
+
if (AnthropicTransport.RETRYABLE_STATUSES.includes(response.status) &&
|
|
337
|
+
attempt < AnthropicTransport.MAX_RETRIES) {
|
|
338
|
+
// Exponential backoff: 1s, 2s, 4s, 8s, 16s
|
|
339
|
+
yield new Promise((resolve) => {
|
|
340
|
+
setTimeout(resolve, AnthropicTransport.BACKOFF_BASE_MS * (1 << attempt));
|
|
341
|
+
});
|
|
342
|
+
continue;
|
|
343
|
+
}
|
|
344
|
+
if (!response.ok) {
|
|
345
|
+
const err = yield response.text();
|
|
346
|
+
throw new Error(`Anthropic request error ${response.status}: ${err}`);
|
|
347
|
+
}
|
|
348
|
+
return (yield response.json());
|
|
349
|
+
}
|
|
350
|
+
/* eslint-enable no-await-in-loop */
|
|
351
|
+
throw new Error('Anthropic request failed: max retries exceeded');
|
|
352
|
+
});
|
|
353
|
+
}
|
|
354
|
+
}
|
|
355
|
+
// ── Private helpers ────────────────────────────────────────────────────
|
|
356
|
+
AnthropicTransport.TOKENS_PER_MILLION = 1000000;
|
|
357
|
+
AnthropicTransport.COST_DECIMAL_PLACES = 6;
|
|
358
|
+
AnthropicTransport.MAX_RETRIES = 5;
|
|
359
|
+
AnthropicTransport.RATE_LIMIT_STATUS = 429;
|
|
360
|
+
AnthropicTransport.SERVICE_UNAVAILABLE_STATUS = 503;
|
|
361
|
+
AnthropicTransport.OVERLOADED_STATUS = 529;
|
|
362
|
+
AnthropicTransport.RETRYABLE_STATUSES = [
|
|
363
|
+
AnthropicTransport.RATE_LIMIT_STATUS,
|
|
364
|
+
AnthropicTransport.SERVICE_UNAVAILABLE_STATUS,
|
|
365
|
+
AnthropicTransport.OVERLOADED_STATUS,
|
|
366
|
+
];
|
|
367
|
+
AnthropicTransport.BACKOFF_BASE_MS = 1000;
|
|
@@ -4,6 +4,15 @@ import { logger } from '../utils/logger';
|
|
|
4
4
|
const GEMINI_DIRECT_URL = (model) => `https://generativelanguage.googleapis.com/v1beta/models/${model}:generateContent`;
|
|
5
5
|
const DEFAULT_MODEL = 'gemini-2.5-flash-lite';
|
|
6
6
|
const DEFAULT_TIMEOUT = 180000; // 3 minutes
|
|
7
|
+
/**
|
|
8
|
+
* Maximum input context window (tokens) per supported Gemini model.
|
|
9
|
+
* Source: https://ai.google.dev/gemini-api/docs/models
|
|
10
|
+
*/
|
|
11
|
+
const GEMINI_CONTEXT_LIMITS = {
|
|
12
|
+
'gemini-2.5-pro': 1048576,
|
|
13
|
+
'gemini-2.5-flash': 1048576,
|
|
14
|
+
'gemini-2.5-flash-lite': 1048576,
|
|
15
|
+
};
|
|
7
16
|
function assertSupportedGeminiModel(model) {
|
|
8
17
|
if (!SUPPORTED_GEMINI_MODEL_IDS.includes(model)) {
|
|
9
18
|
throw new Error(`GeminiTransport: unsupported model "${model}". Use one of: ${SUPPORTED_GEMINI_MODEL_IDS.join(', ')}.`);
|
|
@@ -47,6 +56,13 @@ export class MalformedFunctionCallError extends Error {
|
|
|
47
56
|
export class GeminiTransport {
|
|
48
57
|
constructor(config = {}) {
|
|
49
58
|
var _a, _b;
|
|
59
|
+
/**
|
|
60
|
+
* Estimated USD cost accumulated across every successful request on this
|
|
61
|
+
* transport instance. Convenience accessor for non-chat consumers
|
|
62
|
+
* (telemetry, debug overlays). The main chat UI sums per-message `cost`
|
|
63
|
+
* fields instead so its session total stays attributed to chat turns only.
|
|
64
|
+
*/
|
|
65
|
+
this.lifetimeCostUsd = 0;
|
|
50
66
|
const model = (_a = config.model) !== null && _a !== void 0 ? _a : DEFAULT_MODEL;
|
|
51
67
|
assertSupportedGeminiModel(model);
|
|
52
68
|
this.model = model;
|
|
@@ -61,7 +77,19 @@ export class GeminiTransport {
|
|
|
61
77
|
this.serverEndpoint = config.serverEndpoint;
|
|
62
78
|
}
|
|
63
79
|
getConfig() {
|
|
64
|
-
return {
|
|
80
|
+
return {
|
|
81
|
+
provider: 'gemini',
|
|
82
|
+
model: this.model,
|
|
83
|
+
contextLimit: GEMINI_CONTEXT_LIMITS[this.model],
|
|
84
|
+
};
|
|
85
|
+
}
|
|
86
|
+
/** Estimated USD cost accumulated across every successful request on this transport instance. */
|
|
87
|
+
getLifetimeCost() {
|
|
88
|
+
return this.lifetimeCostUsd;
|
|
89
|
+
}
|
|
90
|
+
/** Reset the lifetime cost counter. Intended for chat-clear / new-session flows. */
|
|
91
|
+
resetLifetimeCost() {
|
|
92
|
+
this.lifetimeCostUsd = 0;
|
|
65
93
|
}
|
|
66
94
|
// ── AITransport (structured prompt) ────────────────────────────────────
|
|
67
95
|
sendStructuredPrompt(options) {
|
|
@@ -101,16 +129,24 @@ export class GeminiTransport {
|
|
|
101
129
|
return this.fromGeminiResponse(response);
|
|
102
130
|
});
|
|
103
131
|
}
|
|
132
|
+
/**
|
|
133
|
+
* Logs the per-call cost breakdown, accumulates the lifetime running total,
|
|
134
|
+
* and returns the per-call total so the caller can attach it to the response
|
|
135
|
+
* message.
|
|
136
|
+
*/
|
|
104
137
|
logTokenUsage(promptTokens, candidateTokens) {
|
|
105
138
|
const { promptPerMillion, candidatePerMillion } = estimatedGeminiPaidRatesUsdPerMillion(this.model);
|
|
106
139
|
const promptCost = (promptTokens / GeminiTransport.TOKENS_PER_MILLION) * promptPerMillion;
|
|
107
140
|
const candidateCost = (candidateTokens / GeminiTransport.TOKENS_PER_MILLION) * candidatePerMillion;
|
|
108
141
|
const totalCost = promptCost + candidateCost;
|
|
142
|
+
this.lifetimeCostUsd += totalCost;
|
|
109
143
|
console.log(`--- Gemini Token Usage (${this.model}) ---`);
|
|
110
144
|
console.log(`Prompt Tokens: ${promptTokens} ($${promptCost.toFixed(GeminiTransport.COST_DECIMAL_PLACES)})`);
|
|
111
145
|
console.log(`Candidate Tokens: ${candidateTokens} ($${candidateCost.toFixed(GeminiTransport.COST_DECIMAL_PLACES)})`);
|
|
112
146
|
console.log(`Total Cost: $${totalCost.toFixed(GeminiTransport.COST_DECIMAL_PLACES)}`);
|
|
147
|
+
console.log(`Lifetime Cost: $${this.lifetimeCostUsd.toFixed(GeminiTransport.COST_DECIMAL_PLACES)}`);
|
|
113
148
|
console.log('--------------------------');
|
|
149
|
+
return totalCost;
|
|
114
150
|
}
|
|
115
151
|
toGeminiContents(history, userMessage, attachments) {
|
|
116
152
|
var _a, _b, _c;
|
|
@@ -168,12 +204,17 @@ export class GeminiTransport {
|
|
|
168
204
|
fromGeminiResponse(response) {
|
|
169
205
|
var _a, _b, _c, _d, _e;
|
|
170
206
|
let inputTokens;
|
|
207
|
+
let outputTokens;
|
|
208
|
+
let cost;
|
|
171
209
|
if (response.usageMetadata) {
|
|
172
210
|
const usage = response.usageMetadata;
|
|
173
|
-
this.logTokenUsage((_a = usage.promptTokenCount) !== null && _a !== void 0 ? _a : 0, (_b = usage.candidatesTokenCount) !== null && _b !== void 0 ? _b : 0);
|
|
211
|
+
cost = this.logTokenUsage((_a = usage.promptTokenCount) !== null && _a !== void 0 ? _a : 0, (_b = usage.candidatesTokenCount) !== null && _b !== void 0 ? _b : 0);
|
|
174
212
|
if (usage.promptTokenCount != null) {
|
|
175
213
|
inputTokens = usage.promptTokenCount;
|
|
176
214
|
}
|
|
215
|
+
if (usage.candidatesTokenCount != null) {
|
|
216
|
+
outputTokens = usage.candidatesTokenCount;
|
|
217
|
+
}
|
|
177
218
|
}
|
|
178
219
|
const candidates = response === null || response === void 0 ? void 0 : response.candidates;
|
|
179
220
|
const firstCandidate = candidates === null || candidates === void 0 ? void 0 : candidates[0];
|
|
@@ -211,7 +252,13 @@ export class GeminiTransport {
|
|
|
211
252
|
toolCalls,
|
|
212
253
|
}
|
|
213
254
|
: { role: 'assistant', content: textParts.join('') };
|
|
214
|
-
|
|
255
|
+
if (inputTokens != null)
|
|
256
|
+
base.inputTokens = inputTokens;
|
|
257
|
+
if (outputTokens != null)
|
|
258
|
+
base.outputTokens = outputTokens;
|
|
259
|
+
if (cost != null)
|
|
260
|
+
base.cost = cost;
|
|
261
|
+
return base;
|
|
215
262
|
}
|
|
216
263
|
buildEndpoint(body) {
|
|
217
264
|
if (this.apiKey) {
|
|
@@ -2,6 +2,17 @@ import { __awaiter } from "tslib";
|
|
|
2
2
|
const AI_SERVER_PATH = '/gwf/ai-service/chat-completions';
|
|
3
3
|
const DEFAULT_MODEL = 'gpt-4o-mini';
|
|
4
4
|
const DEFAULT_TIMEOUT = 30000;
|
|
5
|
+
/**
|
|
6
|
+
* Maximum input context window (tokens) per known OpenAI model.
|
|
7
|
+
* OpenAI's model identifier is a free-form string, so this is a best-effort
|
|
8
|
+
* lookup — unknown models return undefined and the context indicator hides.
|
|
9
|
+
* Source: https://platform.openai.com/docs/models
|
|
10
|
+
*/
|
|
11
|
+
const OPENAI_CONTEXT_LIMITS = {
|
|
12
|
+
'gpt-4o': 128000,
|
|
13
|
+
'gpt-4o-mini': 128000,
|
|
14
|
+
'gpt-4-turbo': 128000,
|
|
15
|
+
};
|
|
5
16
|
/**
|
|
6
17
|
* Server-proxy transport for OpenAI. Calls the standard server endpoint.
|
|
7
18
|
* API key stays on the server; client never sees it.
|
|
@@ -15,7 +26,11 @@ export class ServerOpenAITransport {
|
|
|
15
26
|
this.timeout = (_b = config.timeout) !== null && _b !== void 0 ? _b : DEFAULT_TIMEOUT;
|
|
16
27
|
}
|
|
17
28
|
getConfig() {
|
|
18
|
-
return {
|
|
29
|
+
return {
|
|
30
|
+
provider: 'openai',
|
|
31
|
+
model: this.model,
|
|
32
|
+
contextLimit: OPENAI_CONTEXT_LIMITS[this.model],
|
|
33
|
+
};
|
|
19
34
|
}
|
|
20
35
|
sendStructuredPrompt(options) {
|
|
21
36
|
return __awaiter(this, void 0, void 0, function* () {
|