@genesislcap/foundation-ai 14.454.2 → 14.455.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/dts/index.d.ts +2 -1
- package/dist/dts/index.d.ts.map +1 -1
- package/dist/dts/transports/anthropic-transport.d.ts.map +1 -1
- package/dist/dts/transports/gemini-malformed-call.d.ts +17 -0
- package/dist/dts/transports/gemini-malformed-call.d.ts.map +1 -0
- package/dist/dts/transports/gemini-transport.d.ts +10 -0
- package/dist/dts/transports/gemini-transport.d.ts.map +1 -1
- package/dist/dts/types/chat.types.d.ts +87 -9
- package/dist/dts/types/chat.types.d.ts.map +1 -1
- package/dist/dts/utils/temperature.d.ts +49 -0
- package/dist/dts/utils/temperature.d.ts.map +1 -0
- package/dist/esm/index.js +1 -0
- package/dist/esm/transports/anthropic-transport.js +32 -5
- package/dist/esm/transports/gemini-malformed-call.js +242 -0
- package/dist/esm/transports/gemini-transport.js +156 -31
- package/dist/esm/utils/temperature.js +56 -0
- package/dist/foundation-ai.api.json +234 -3
- package/dist/foundation-ai.d.ts +126 -9
- package/package.json +11 -11
|
@@ -2,6 +2,8 @@ import { __awaiter } from "tslib";
|
|
|
2
2
|
import { SUPPORTED_GEMINI_MODEL_IDS, } from '../types';
|
|
3
3
|
import { combineSignals } from '../utils/combine-signals';
|
|
4
4
|
import { logger } from '../utils/logger';
|
|
5
|
+
import { scaleTemperature } from '../utils/temperature';
|
|
6
|
+
import { repairMalformedFunctionCall } from './gemini-malformed-call';
|
|
5
7
|
const GEMINI_DIRECT_URL = (model) => `https://generativelanguage.googleapis.com/v1beta/models/${model}:generateContent`;
|
|
6
8
|
const DEFAULT_MODEL = 'gemini-2.5-flash-lite';
|
|
7
9
|
const DEFAULT_TIMEOUT = 180000; // 3 minutes
|
|
@@ -14,6 +16,22 @@ const GEMINI_CONTEXT_LIMITS = {
|
|
|
14
16
|
'gemini-2.5-flash': 1048576,
|
|
15
17
|
'gemini-2.5-flash-lite': 1048576,
|
|
16
18
|
};
|
|
19
|
+
/** Gemini's native default + ceiling temperature — https://ai.google.dev/gemini-api/docs */
|
|
20
|
+
const GEMINI_DEFAULT_TEMPERATURE = 1;
|
|
21
|
+
const GEMINI_MAX_TEMPERATURE = 2;
|
|
22
|
+
/**
|
|
23
|
+
* Map a provider-agnostic {@link ChatToolChoice} to Gemini's `toolConfig`.
|
|
24
|
+
* Returns undefined for the default (`'auto'`) so the request omits the field.
|
|
25
|
+
*/
|
|
26
|
+
function toGeminiToolConfig(choice) {
|
|
27
|
+
if (!choice || choice === 'auto')
|
|
28
|
+
return undefined;
|
|
29
|
+
if (choice === 'required')
|
|
30
|
+
return { functionCallingConfig: { mode: 'ANY' } };
|
|
31
|
+
if (choice === 'none')
|
|
32
|
+
return { functionCallingConfig: { mode: 'NONE' } };
|
|
33
|
+
return { functionCallingConfig: { mode: 'ANY', allowedFunctionNames: [choice.tool] } };
|
|
34
|
+
}
|
|
17
35
|
function assertSupportedGeminiModel(model) {
|
|
18
36
|
if (!SUPPORTED_GEMINI_MODEL_IDS.includes(model)) {
|
|
19
37
|
throw new Error(`GeminiTransport: unsupported model "${model}". Use one of: ${SUPPORTED_GEMINI_MODEL_IDS.join(', ')}.`);
|
|
@@ -110,7 +128,7 @@ export class GeminiTransport {
|
|
|
110
128
|
// ── ChatTransport (multi-turn chat) ────────────────────────────────────
|
|
111
129
|
sendChatMessage(history, userMessage, options) {
|
|
112
130
|
return __awaiter(this, void 0, void 0, function* () {
|
|
113
|
-
var _a;
|
|
131
|
+
var _a, _b;
|
|
114
132
|
const contents = this.toGeminiContents(history, userMessage, options === null || options === void 0 ? void 0 : options.attachments);
|
|
115
133
|
const tools = ((_a = options === null || options === void 0 ? void 0 : options.tools) === null || _a === void 0 ? void 0 : _a.length)
|
|
116
134
|
? [
|
|
@@ -126,16 +144,32 @@ export class GeminiTransport {
|
|
|
126
144
|
const systemInstruction = (options === null || options === void 0 ? void 0 : options.systemPrompt)
|
|
127
145
|
? { role: 'system', parts: [{ text: options.systemPrompt }] }
|
|
128
146
|
: undefined;
|
|
129
|
-
//
|
|
130
|
-
//
|
|
131
|
-
//
|
|
132
|
-
//
|
|
133
|
-
//
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
147
|
+
// Map the requested tool-call mode to Gemini's toolConfig — only meaningful
|
|
148
|
+
// when functions are declared. NOTE: forcing (`ANY` / allowedFunctionNames)
|
|
149
|
+
// can make the model batch calls in Python-ish syntax →
|
|
150
|
+
// `MALFORMED_FUNCTION_CALL`, which `fromGeminiResponse` repairs in place when
|
|
151
|
+
// it can confidently parse the call (see `repairMalformedFunctionCall`),
|
|
152
|
+
// falling back to the caller's retry otherwise.
|
|
153
|
+
const toolConfig = tools ? toGeminiToolConfig(options === null || options === void 0 ? void 0 : options.toolChoice) : undefined;
|
|
154
|
+
// Request thought summaries so the model's official reasoning is returned
|
|
155
|
+
// (and a thinking-only turn surfaces *something* rather than going silently
|
|
156
|
+
// blank). Gemini 2.5 Pro always thinks regardless, so this only changes what
|
|
157
|
+
// is RETURNED, not what is billed — see `logTokenUsage` for the thinking-token
|
|
158
|
+
// accounting it lets us finally capture.
|
|
159
|
+
const generationConfig = { thinkingConfig: { includeThoughts: true } };
|
|
160
|
+
// Normalized [0,1] temperature → Gemini's native range, anchored so 0.5
|
|
161
|
+
// maps to its default (native 1) and 1 to its ceiling (native 2).
|
|
162
|
+
if ((options === null || options === void 0 ? void 0 : options.temperature) != null) {
|
|
163
|
+
generationConfig.temperature = scaleTemperature(options.temperature, {
|
|
164
|
+
defaultTemp: GEMINI_DEFAULT_TEMPERATURE,
|
|
165
|
+
maxTemp: GEMINI_MAX_TEMPERATURE,
|
|
166
|
+
});
|
|
167
|
+
}
|
|
168
|
+
// Names of the tools offered this turn — used to validate a repaired
|
|
169
|
+
// malformed call against the real tool surface before accepting it.
|
|
170
|
+
const offeredToolNames = new Set(((_b = options === null || options === void 0 ? void 0 : options.tools) !== null && _b !== void 0 ? _b : []).map((t) => t.name));
|
|
171
|
+
const response = yield this.post({ model: this.model, contents, tools, systemInstruction, toolConfig, generationConfig }, options === null || options === void 0 ? void 0 : options.signal);
|
|
172
|
+
return this.fromGeminiResponse(response, offeredToolNames);
|
|
139
173
|
});
|
|
140
174
|
}
|
|
141
175
|
/**
|
|
@@ -143,15 +177,19 @@ export class GeminiTransport {
|
|
|
143
177
|
* and returns the per-call total so the caller can attach it to the response
|
|
144
178
|
* message.
|
|
145
179
|
*/
|
|
146
|
-
logTokenUsage(promptTokens, candidateTokens) {
|
|
180
|
+
logTokenUsage(promptTokens, candidateTokens, thoughtTokens) {
|
|
147
181
|
const { promptPerMillion, candidatePerMillion } = estimatedGeminiPaidRatesUsdPerMillion(this.model);
|
|
148
182
|
const promptCost = (promptTokens / GeminiTransport.TOKENS_PER_MILLION) * promptPerMillion;
|
|
149
|
-
|
|
183
|
+
// Thinking tokens are billed at the output (candidate) rate. They are
|
|
184
|
+
// incurred whenever the model thinks (always, on 2.5 Pro) — counting them
|
|
185
|
+
// here corrects a long-standing undercount, it does not raise the bill.
|
|
186
|
+
const candidateCost = ((candidateTokens + thoughtTokens) / GeminiTransport.TOKENS_PER_MILLION) *
|
|
187
|
+
candidatePerMillion;
|
|
150
188
|
const totalCost = promptCost + candidateCost;
|
|
151
189
|
this.lifetimeCostUsd += totalCost;
|
|
152
190
|
console.log(`--- Gemini Token Usage (${this.model}) ---`);
|
|
153
191
|
console.log(`Prompt Tokens: ${promptTokens} ($${promptCost.toFixed(GeminiTransport.COST_DECIMAL_PLACES)})`);
|
|
154
|
-
console.log(`Candidate Tokens: ${candidateTokens} ($${candidateCost.toFixed(GeminiTransport.COST_DECIMAL_PLACES)})`);
|
|
192
|
+
console.log(`Candidate Tokens: ${candidateTokens} (+${thoughtTokens} thinking) ($${candidateCost.toFixed(GeminiTransport.COST_DECIMAL_PLACES)})`);
|
|
155
193
|
console.log(`Total Cost: $${totalCost.toFixed(GeminiTransport.COST_DECIMAL_PLACES)}`);
|
|
156
194
|
console.log(`Lifetime Cost: $${this.lifetimeCostUsd.toFixed(GeminiTransport.COST_DECIMAL_PLACES)}`);
|
|
157
195
|
console.log('--------------------------');
|
|
@@ -210,27 +248,32 @@ export class GeminiTransport {
|
|
|
210
248
|
}
|
|
211
249
|
return contents;
|
|
212
250
|
}
|
|
213
|
-
fromGeminiResponse(response) {
|
|
214
|
-
var _a, _b, _c, _d, _e;
|
|
251
|
+
fromGeminiResponse(response, offeredToolNames = new Set()) {
|
|
252
|
+
var _a, _b, _c, _d, _e, _f, _g, _h, _j;
|
|
215
253
|
let inputTokens;
|
|
216
254
|
let outputTokens;
|
|
255
|
+
let thoughtsTokens;
|
|
217
256
|
let cost;
|
|
218
257
|
if (response.usageMetadata) {
|
|
219
258
|
const usage = response.usageMetadata;
|
|
220
|
-
cost = this.logTokenUsage((_a = usage.promptTokenCount) !== null && _a !== void 0 ? _a : 0, (_b = usage.candidatesTokenCount) !== null && _b !== void 0 ? _b : 0);
|
|
259
|
+
cost = this.logTokenUsage((_a = usage.promptTokenCount) !== null && _a !== void 0 ? _a : 0, (_b = usage.candidatesTokenCount) !== null && _b !== void 0 ? _b : 0, (_c = usage.thoughtsTokenCount) !== null && _c !== void 0 ? _c : 0);
|
|
221
260
|
if (usage.promptTokenCount != null) {
|
|
222
261
|
inputTokens = usage.promptTokenCount;
|
|
223
262
|
}
|
|
224
|
-
if (usage.
|
|
225
|
-
|
|
263
|
+
if (usage.thoughtsTokenCount != null) {
|
|
264
|
+
thoughtsTokens = usage.thoughtsTokenCount;
|
|
265
|
+
}
|
|
266
|
+
if (usage.candidatesTokenCount != null || usage.thoughtsTokenCount != null) {
|
|
267
|
+
// Thinking tokens are generated output, billed at the candidate rate, but
|
|
268
|
+
// reported in a field disjoint from `candidatesTokenCount`. Fold them into
|
|
269
|
+
// the provider-agnostic `outputTokens` so it reflects the true generated
|
|
270
|
+
// total — matching Anthropic, whose `output_tokens` already includes them.
|
|
271
|
+
outputTokens = ((_d = usage.candidatesTokenCount) !== null && _d !== void 0 ? _d : 0) + ((_e = usage.thoughtsTokenCount) !== null && _e !== void 0 ? _e : 0);
|
|
226
272
|
}
|
|
227
273
|
}
|
|
228
274
|
const candidates = response === null || response === void 0 ? void 0 : response.candidates;
|
|
229
275
|
const firstCandidate = candidates === null || candidates === void 0 ? void 0 : candidates[0];
|
|
230
|
-
|
|
231
|
-
throw new MalformedFunctionCallError(firstCandidate.finishMessage);
|
|
232
|
-
}
|
|
233
|
-
const parts = (_d = (_c = firstCandidate === null || firstCandidate === void 0 ? void 0 : firstCandidate.content) === null || _c === void 0 ? void 0 : _c.parts) !== null && _d !== void 0 ? _d : [];
|
|
276
|
+
const parts = (_g = (_f = firstCandidate === null || firstCandidate === void 0 ? void 0 : firstCandidate.content) === null || _f === void 0 ? void 0 : _f.parts) !== null && _g !== void 0 ? _g : [];
|
|
234
277
|
const toolCalls = [];
|
|
235
278
|
const thoughtParts = [];
|
|
236
279
|
const textParts = [];
|
|
@@ -239,7 +282,7 @@ export class GeminiTransport {
|
|
|
239
282
|
toolCalls.push({
|
|
240
283
|
id: crypto.randomUUID(),
|
|
241
284
|
name: part.functionCall.name,
|
|
242
|
-
args: (
|
|
285
|
+
args: (_h = part.functionCall.args) !== null && _h !== void 0 ? _h : {},
|
|
243
286
|
});
|
|
244
287
|
}
|
|
245
288
|
else if (part.thought && part.text) {
|
|
@@ -249,26 +292,108 @@ export class GeminiTransport {
|
|
|
249
292
|
textParts.push(part.text);
|
|
250
293
|
}
|
|
251
294
|
}
|
|
252
|
-
//
|
|
253
|
-
//
|
|
254
|
-
//
|
|
255
|
-
//
|
|
256
|
-
//
|
|
295
|
+
// Gemini sometimes flags MALFORMED_FUNCTION_CALL when it produced a complete
|
|
296
|
+
// tool call but serialised it as a Python statement. Recover it in place
|
|
297
|
+
// when we can parse it with full confidence (its `parts` are empty in this
|
|
298
|
+
// case, so the repaired call flows through the normal token/cost handling
|
|
299
|
+
// below); otherwise throw so the ChatDriver retry takes over.
|
|
300
|
+
if ((firstCandidate === null || firstCandidate === void 0 ? void 0 : firstCandidate.finishReason) === 'MALFORMED_FUNCTION_CALL') {
|
|
301
|
+
const repaired = repairMalformedFunctionCall(firstCandidate.finishMessage, offeredToolNames);
|
|
302
|
+
if (!repaired) {
|
|
303
|
+
throw new MalformedFunctionCallError(firstCandidate.finishMessage);
|
|
304
|
+
}
|
|
305
|
+
toolCalls.push({ id: crypto.randomUUID(), name: repaired.name, args: repaired.args });
|
|
306
|
+
}
|
|
307
|
+
// The model's official thinking (thought-summary parts, returned because we
|
|
308
|
+
// set `thinkingConfig.includeThoughts`) is surfaced through the SAME channel
|
|
309
|
+
// as its tool-call narration: both go in `content`, which the driver tags as
|
|
310
|
+
// a toggleable "thinking" block. We combine them so neither is dropped.
|
|
311
|
+
// NOTE: for now real thinking and narration share one block; if the real
|
|
312
|
+
// thinking proves too verbose we may want to split it into its own channel.
|
|
313
|
+
// Final answers surface only the answer text — thought summaries are
|
|
314
|
+
// deliberately not promoted to be the answer.
|
|
315
|
+
const thinking = thoughtParts.join('');
|
|
316
|
+
const narration = textParts.join('');
|
|
257
317
|
const base = toolCalls.length > 0
|
|
258
318
|
? {
|
|
259
319
|
role: 'assistant',
|
|
260
|
-
content:
|
|
320
|
+
content: [thinking, narration].filter(Boolean).join('\n\n'),
|
|
261
321
|
toolCalls,
|
|
262
322
|
}
|
|
263
|
-
: { role: 'assistant', content:
|
|
323
|
+
: { role: 'assistant', content: narration };
|
|
324
|
+
// Blank / non-STOP finishes are the recurring failure mode (especially empty
|
|
325
|
+
// STOP on 2.5 Pro). Log the full shape so the cause is legible from the
|
|
326
|
+
// breakdown rather than inferred. MALFORMED is excluded — it has its own
|
|
327
|
+
// repair/throw path above.
|
|
328
|
+
const finishReason = firstCandidate === null || firstCandidate === void 0 ? void 0 : firstCandidate.finishReason;
|
|
329
|
+
const isBlank = toolCalls.length === 0 && !thinking && !narration;
|
|
330
|
+
if (isBlank ||
|
|
331
|
+
(finishReason && finishReason !== 'STOP' && finishReason !== 'MALFORMED_FUNCTION_CALL')) {
|
|
332
|
+
this.logAbnormalResponse(response, {
|
|
333
|
+
finishReason,
|
|
334
|
+
functionCallParts: toolCalls.length,
|
|
335
|
+
thoughtParts: thoughtParts.length,
|
|
336
|
+
textParts: textParts.length,
|
|
337
|
+
});
|
|
338
|
+
}
|
|
264
339
|
if (inputTokens != null)
|
|
265
340
|
base.inputTokens = inputTokens;
|
|
266
341
|
if (outputTokens != null)
|
|
267
342
|
base.outputTokens = outputTokens;
|
|
268
343
|
if (cost != null)
|
|
269
344
|
base.cost = cost;
|
|
345
|
+
// Surface the provider diagnostic on the message so the driver can fold it
|
|
346
|
+
// into the debug-log meta events (the console log above is dev-only). Only
|
|
347
|
+
// when there's signal — a finish reason, thinking tokens, or a block reason.
|
|
348
|
+
const blockReason = (_j = response.promptFeedback) === null || _j === void 0 ? void 0 : _j.blockReason;
|
|
349
|
+
if (finishReason != null || thoughtsTokens != null || blockReason != null) {
|
|
350
|
+
base.responseMeta = {
|
|
351
|
+
finishReason,
|
|
352
|
+
thoughtsTokens,
|
|
353
|
+
parts: {
|
|
354
|
+
functionCall: toolCalls.length,
|
|
355
|
+
thought: thoughtParts.length,
|
|
356
|
+
text: textParts.length,
|
|
357
|
+
},
|
|
358
|
+
blockReason,
|
|
359
|
+
};
|
|
360
|
+
}
|
|
270
361
|
return base;
|
|
271
362
|
}
|
|
363
|
+
/**
|
|
364
|
+
* Log the full shape of a blank or non-STOP response so its cause is legible
|
|
365
|
+
* without re-deriving it: a thinking-only STOP (substantial `thoughtsTokenCount`,
|
|
366
|
+
* ~0 `candidatesTokenCount`) vs a content block (`SAFETY` / `RECITATION`) vs a
|
|
367
|
+
* token cap (`MAX_TOKENS`) vs a prompt-level block (top-level
|
|
368
|
+
* `promptFeedback.blockReason`). On 2.5 Pro — which always thinks — a blank
|
|
369
|
+
* STOP carrying substantial thought tokens is the "thought, then stopped
|
|
370
|
+
* without answering" signature.
|
|
371
|
+
*/
|
|
372
|
+
logAbnormalResponse(response, parsed) {
|
|
373
|
+
var _a, _b, _c;
|
|
374
|
+
const usage = ((_a = response.usageMetadata) !== null && _a !== void 0 ? _a : {});
|
|
375
|
+
const candidate = (_c = (_b = response.candidates) === null || _b === void 0 ? void 0 : _b[0]) !== null && _c !== void 0 ? _c : {};
|
|
376
|
+
logger.warn('GeminiTransport: blank/non-STOP response', {
|
|
377
|
+
model: this.model,
|
|
378
|
+
finishReason: parsed.finishReason,
|
|
379
|
+
finishMessage: candidate.finishMessage,
|
|
380
|
+
parts: {
|
|
381
|
+
total: parsed.functionCallParts + parsed.thoughtParts + parsed.textParts,
|
|
382
|
+
functionCall: parsed.functionCallParts,
|
|
383
|
+
thought: parsed.thoughtParts,
|
|
384
|
+
text: parsed.textParts,
|
|
385
|
+
},
|
|
386
|
+
usage: {
|
|
387
|
+
promptTokenCount: usage.promptTokenCount,
|
|
388
|
+
candidatesTokenCount: usage.candidatesTokenCount,
|
|
389
|
+
thoughtsTokenCount: usage.thoughtsTokenCount,
|
|
390
|
+
totalTokenCount: usage.totalTokenCount,
|
|
391
|
+
},
|
|
392
|
+
promptFeedback: response.promptFeedback,
|
|
393
|
+
safetyRatings: candidate.safetyRatings,
|
|
394
|
+
citationMetadata: candidate.citationMetadata,
|
|
395
|
+
});
|
|
396
|
+
}
|
|
272
397
|
buildEndpoint(body) {
|
|
273
398
|
if (this.apiKey) {
|
|
274
399
|
return {
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Normalized-temperature anchor. Values at or below it scale into
|
|
3
|
+
* `[0, providerDefault]`; values above it scale into `[providerDefault, providerMax]`.
|
|
4
|
+
* So a normalized `0.5` maps to each provider's own default temperature.
|
|
5
|
+
*/
|
|
6
|
+
const DEFAULT_ANCHOR = 0.5;
|
|
7
|
+
/**
|
|
8
|
+
* Translate a provider-agnostic, normalized temperature into a provider's
|
|
9
|
+
* native range, anchored on the provider's own default so the normalized scale
|
|
10
|
+
* means the same thing everywhere:
|
|
11
|
+
*
|
|
12
|
+
* - `0` → `0` (fully deterministic),
|
|
13
|
+
* - `0.5` → the provider's default temperature,
|
|
14
|
+
* - `1` → the provider's maximum.
|
|
15
|
+
*
|
|
16
|
+
* Values between the anchors interpolate linearly within each half, so `< 0.5`
|
|
17
|
+
* is "more focused than default" and `> 0.5` is "more random than default" on
|
|
18
|
+
* every provider — even though the providers' native ranges differ (Anthropic
|
|
19
|
+
* default/max `1`/`1`, Gemini `1`/`2`). Where a provider's default equals its
|
|
20
|
+
* max (Anthropic), the upper half is flat: it cannot go hotter than its default.
|
|
21
|
+
*
|
|
22
|
+
* The input is clamped to `[0, 1]` before scaling, so an out-of-range value can
|
|
23
|
+
* never produce a native temperature the provider would reject.
|
|
24
|
+
*/
|
|
25
|
+
export function scaleTemperature(normalized, { defaultTemp, maxTemp }) {
|
|
26
|
+
const t = Math.min(Math.max(normalized, 0), 1);
|
|
27
|
+
return t <= DEFAULT_ANCHOR
|
|
28
|
+
? (t / DEFAULT_ANCHOR) * defaultTemp
|
|
29
|
+
: defaultTemp + ((t - DEFAULT_ANCHOR) / (1 - DEFAULT_ANCHOR)) * (maxTemp - defaultTemp);
|
|
30
|
+
}
|
|
31
|
+
/**
|
|
32
|
+
* Provider-agnostic, normalized sampling-temperature presets in `0`–`1` space —
|
|
33
|
+
* named handles for the values most callers actually want, so intent reads
|
|
34
|
+
* better than a bare magnitude. Each maps through `scaleTemperature` to the
|
|
35
|
+
* active provider's native range, so the same preset means the same intent
|
|
36
|
+
* whichever provider services the turn:
|
|
37
|
+
*
|
|
38
|
+
* - `ChatTemperature.Deterministic` (`0`) — greedy/argmax sampling.
|
|
39
|
+
* - `ChatTemperature.Focused` (`0.25`) — low but not greedy; precise tool calls
|
|
40
|
+
* and extraction work where you still want a little slack.
|
|
41
|
+
* - `ChatTemperature.Balanced` (`0.5`) — the provider's own default.
|
|
42
|
+
* - `ChatTemperature.Creative` (`0.75`) — hotter than default, short of the ceiling.
|
|
43
|
+
* - `ChatTemperature.Maximum` (`1`) — the hottest the active provider allows.
|
|
44
|
+
*
|
|
45
|
+
* (On a provider whose default equals its max — Anthropic — `Creative` and
|
|
46
|
+
* `Maximum` coincide; see `scaleTemperature`.)
|
|
47
|
+
*
|
|
48
|
+
* @beta
|
|
49
|
+
*/
|
|
50
|
+
export const ChatTemperature = {
|
|
51
|
+
Deterministic: 0,
|
|
52
|
+
Focused: 0.25,
|
|
53
|
+
Balanced: 0.5,
|
|
54
|
+
Creative: 0.75,
|
|
55
|
+
Maximum: 1,
|
|
56
|
+
};
|
|
@@ -3282,6 +3282,34 @@
|
|
|
3282
3282
|
"endIndex": 2
|
|
3283
3283
|
}
|
|
3284
3284
|
},
|
|
3285
|
+
{
|
|
3286
|
+
"kind": "PropertySignature",
|
|
3287
|
+
"canonicalReference": "@genesislcap/foundation-ai!ChatMessage#responseMeta:member",
|
|
3288
|
+
"docComment": "/**\n * Provider diagnostic for the request that produced this message — the raw finish reason plus, where the provider reports them, the reasoning-token count and a parts breakdown. Set by transports that expose it (Gemini); the driver folds it into the debug-log meta events (e.g. an empty-response `turn.retry`/`turn.error`) so a blank or truncated turn's cause is legible. Not shown to the user.\n */\n",
|
|
3289
|
+
"excerptTokens": [
|
|
3290
|
+
{
|
|
3291
|
+
"kind": "Content",
|
|
3292
|
+
"text": "responseMeta?: "
|
|
3293
|
+
},
|
|
3294
|
+
{
|
|
3295
|
+
"kind": "Reference",
|
|
3296
|
+
"text": "ChatResponseMeta",
|
|
3297
|
+
"canonicalReference": "@genesislcap/foundation-ai!ChatResponseMeta:interface"
|
|
3298
|
+
},
|
|
3299
|
+
{
|
|
3300
|
+
"kind": "Content",
|
|
3301
|
+
"text": ";"
|
|
3302
|
+
}
|
|
3303
|
+
],
|
|
3304
|
+
"isReadonly": false,
|
|
3305
|
+
"isOptional": true,
|
|
3306
|
+
"releaseTag": "Beta",
|
|
3307
|
+
"name": "responseMeta",
|
|
3308
|
+
"propertyTypeTokenRange": {
|
|
3309
|
+
"startIndex": 1,
|
|
3310
|
+
"endIndex": 2
|
|
3311
|
+
}
|
|
3312
|
+
},
|
|
3285
3313
|
{
|
|
3286
3314
|
"kind": "PropertySignature",
|
|
3287
3315
|
"canonicalReference": "@genesislcap/foundation-ai!ChatMessage#role:member",
|
|
@@ -3529,18 +3557,46 @@
|
|
|
3529
3557
|
"endIndex": 2
|
|
3530
3558
|
}
|
|
3531
3559
|
},
|
|
3560
|
+
{
|
|
3561
|
+
"kind": "PropertySignature",
|
|
3562
|
+
"canonicalReference": "@genesislcap/foundation-ai!ChatRequestOptions#temperature:member",
|
|
3563
|
+
"docComment": "/**\n * Provider-agnostic sampling temperature, normalized to `0`–`1` and anchored on each provider's own default: `0` is fully deterministic, `0.5` is the provider's default, and `1` is the most random it allows. So `< 0.5` is \"more focused than default\" and `> 0.5` is \"more random than default\" on every provider, even though their native ranges differ (Anthropic `temperature` `0`–`1`, Gemini `generationConfig.temperature` `0`–`2`). Where a provider's default equals its max (Anthropic), the upper half is flat. Values outside `0`–`1` are clamped. Omit to use the provider/model default (equivalent to `0.5`). Prefer the `ChatTemperature` presets for common intents.\n *\n * @beta\n */\n",
|
|
3564
|
+
"excerptTokens": [
|
|
3565
|
+
{
|
|
3566
|
+
"kind": "Content",
|
|
3567
|
+
"text": "temperature?: "
|
|
3568
|
+
},
|
|
3569
|
+
{
|
|
3570
|
+
"kind": "Content",
|
|
3571
|
+
"text": "number"
|
|
3572
|
+
},
|
|
3573
|
+
{
|
|
3574
|
+
"kind": "Content",
|
|
3575
|
+
"text": ";"
|
|
3576
|
+
}
|
|
3577
|
+
],
|
|
3578
|
+
"isReadonly": false,
|
|
3579
|
+
"isOptional": true,
|
|
3580
|
+
"releaseTag": "Beta",
|
|
3581
|
+
"name": "temperature",
|
|
3582
|
+
"propertyTypeTokenRange": {
|
|
3583
|
+
"startIndex": 1,
|
|
3584
|
+
"endIndex": 2
|
|
3585
|
+
}
|
|
3586
|
+
},
|
|
3532
3587
|
{
|
|
3533
3588
|
"kind": "PropertySignature",
|
|
3534
3589
|
"canonicalReference": "@genesislcap/foundation-ai!ChatRequestOptions#toolChoice:member",
|
|
3535
|
-
"docComment": "/**\n * Whether the model
|
|
3590
|
+
"docComment": "/**\n * Whether (and how) the model may call a tool this turn. Defaults to `'auto'` when omitted. Used by sub-agent loops (which force `'required'` so a turn can only end via a tool call) and configurable per agent / per agent state. See {@link ChatToolChoice}.\n *\n * @beta\n */\n",
|
|
3536
3591
|
"excerptTokens": [
|
|
3537
3592
|
{
|
|
3538
3593
|
"kind": "Content",
|
|
3539
3594
|
"text": "toolChoice?: "
|
|
3540
3595
|
},
|
|
3541
3596
|
{
|
|
3542
|
-
"kind": "
|
|
3543
|
-
"text": "
|
|
3597
|
+
"kind": "Reference",
|
|
3598
|
+
"text": "ChatToolChoice",
|
|
3599
|
+
"canonicalReference": "@genesislcap/foundation-ai!ChatToolChoice:type"
|
|
3544
3600
|
},
|
|
3545
3601
|
{
|
|
3546
3602
|
"kind": "Content",
|
|
@@ -3591,6 +3647,132 @@
|
|
|
3591
3647
|
],
|
|
3592
3648
|
"extendsTokenRanges": []
|
|
3593
3649
|
},
|
|
3650
|
+
{
|
|
3651
|
+
"kind": "Interface",
|
|
3652
|
+
"canonicalReference": "@genesislcap/foundation-ai!ChatResponseMeta:interface",
|
|
3653
|
+
"docComment": "/**\n * Provider-reported diagnostic for a single chat response, surfaced so the driver can attribute a blank/abnormal turn without re-deriving it. All fields are optional — a transport sets only what its provider reports.\n *\n * @beta\n */\n",
|
|
3654
|
+
"excerptTokens": [
|
|
3655
|
+
{
|
|
3656
|
+
"kind": "Content",
|
|
3657
|
+
"text": "export interface ChatResponseMeta "
|
|
3658
|
+
}
|
|
3659
|
+
],
|
|
3660
|
+
"fileUrlPath": "src/types/chat.types.ts",
|
|
3661
|
+
"releaseTag": "Beta",
|
|
3662
|
+
"name": "ChatResponseMeta",
|
|
3663
|
+
"preserveMemberOrder": false,
|
|
3664
|
+
"members": [
|
|
3665
|
+
{
|
|
3666
|
+
"kind": "PropertySignature",
|
|
3667
|
+
"canonicalReference": "@genesislcap/foundation-ai!ChatResponseMeta#blockReason:member",
|
|
3668
|
+
"docComment": "/**\n * Provider block reason when the prompt/response was blocked rather than generated.\n */\n",
|
|
3669
|
+
"excerptTokens": [
|
|
3670
|
+
{
|
|
3671
|
+
"kind": "Content",
|
|
3672
|
+
"text": "blockReason?: "
|
|
3673
|
+
},
|
|
3674
|
+
{
|
|
3675
|
+
"kind": "Content",
|
|
3676
|
+
"text": "string"
|
|
3677
|
+
},
|
|
3678
|
+
{
|
|
3679
|
+
"kind": "Content",
|
|
3680
|
+
"text": ";"
|
|
3681
|
+
}
|
|
3682
|
+
],
|
|
3683
|
+
"isReadonly": false,
|
|
3684
|
+
"isOptional": true,
|
|
3685
|
+
"releaseTag": "Beta",
|
|
3686
|
+
"name": "blockReason",
|
|
3687
|
+
"propertyTypeTokenRange": {
|
|
3688
|
+
"startIndex": 1,
|
|
3689
|
+
"endIndex": 2
|
|
3690
|
+
}
|
|
3691
|
+
},
|
|
3692
|
+
{
|
|
3693
|
+
"kind": "PropertySignature",
|
|
3694
|
+
"canonicalReference": "@genesislcap/foundation-ai!ChatResponseMeta#finishReason:member",
|
|
3695
|
+
"docComment": "/**\n * Raw provider finish reason for the turn, verbatim — e.g. Gemini `'STOP'` | `'MAX_TOKENS'` | `'SAFETY'` | `'RECITATION'`. The key signal when triaging a blank or truncated turn.\n */\n",
|
|
3696
|
+
"excerptTokens": [
|
|
3697
|
+
{
|
|
3698
|
+
"kind": "Content",
|
|
3699
|
+
"text": "finishReason?: "
|
|
3700
|
+
},
|
|
3701
|
+
{
|
|
3702
|
+
"kind": "Content",
|
|
3703
|
+
"text": "string"
|
|
3704
|
+
},
|
|
3705
|
+
{
|
|
3706
|
+
"kind": "Content",
|
|
3707
|
+
"text": ";"
|
|
3708
|
+
}
|
|
3709
|
+
],
|
|
3710
|
+
"isReadonly": false,
|
|
3711
|
+
"isOptional": true,
|
|
3712
|
+
"releaseTag": "Beta",
|
|
3713
|
+
"name": "finishReason",
|
|
3714
|
+
"propertyTypeTokenRange": {
|
|
3715
|
+
"startIndex": 1,
|
|
3716
|
+
"endIndex": 2
|
|
3717
|
+
}
|
|
3718
|
+
},
|
|
3719
|
+
{
|
|
3720
|
+
"kind": "PropertySignature",
|
|
3721
|
+
"canonicalReference": "@genesislcap/foundation-ai!ChatResponseMeta#parts:member",
|
|
3722
|
+
"docComment": "/**\n * Count of response parts by kind — distinguishes a truly empty turn from a thinking-only one.\n */\n",
|
|
3723
|
+
"excerptTokens": [
|
|
3724
|
+
{
|
|
3725
|
+
"kind": "Content",
|
|
3726
|
+
"text": "parts?: "
|
|
3727
|
+
},
|
|
3728
|
+
{
|
|
3729
|
+
"kind": "Content",
|
|
3730
|
+
"text": "{\n functionCall: number;\n thought: number;\n text: number;\n }"
|
|
3731
|
+
},
|
|
3732
|
+
{
|
|
3733
|
+
"kind": "Content",
|
|
3734
|
+
"text": ";"
|
|
3735
|
+
}
|
|
3736
|
+
],
|
|
3737
|
+
"isReadonly": false,
|
|
3738
|
+
"isOptional": true,
|
|
3739
|
+
"releaseTag": "Beta",
|
|
3740
|
+
"name": "parts",
|
|
3741
|
+
"propertyTypeTokenRange": {
|
|
3742
|
+
"startIndex": 1,
|
|
3743
|
+
"endIndex": 2
|
|
3744
|
+
}
|
|
3745
|
+
},
|
|
3746
|
+
{
|
|
3747
|
+
"kind": "PropertySignature",
|
|
3748
|
+
"canonicalReference": "@genesislcap/foundation-ai!ChatResponseMeta#thoughtsTokens:member",
|
|
3749
|
+
"docComment": "/**\n * Reasoning (\"thinking\") tokens billed for this turn, when the provider reports them (Gemini 2.5). A high count alongside ~0 output tokens and a `'STOP'` finish is the \"thought, then stopped without answering\" signature.\n */\n",
|
|
3750
|
+
"excerptTokens": [
|
|
3751
|
+
{
|
|
3752
|
+
"kind": "Content",
|
|
3753
|
+
"text": "thoughtsTokens?: "
|
|
3754
|
+
},
|
|
3755
|
+
{
|
|
3756
|
+
"kind": "Content",
|
|
3757
|
+
"text": "number"
|
|
3758
|
+
},
|
|
3759
|
+
{
|
|
3760
|
+
"kind": "Content",
|
|
3761
|
+
"text": ";"
|
|
3762
|
+
}
|
|
3763
|
+
],
|
|
3764
|
+
"isReadonly": false,
|
|
3765
|
+
"isOptional": true,
|
|
3766
|
+
"releaseTag": "Beta",
|
|
3767
|
+
"name": "thoughtsTokens",
|
|
3768
|
+
"propertyTypeTokenRange": {
|
|
3769
|
+
"startIndex": 1,
|
|
3770
|
+
"endIndex": 2
|
|
3771
|
+
}
|
|
3772
|
+
}
|
|
3773
|
+
],
|
|
3774
|
+
"extendsTokenRanges": []
|
|
3775
|
+
},
|
|
3594
3776
|
{
|
|
3595
3777
|
"kind": "TypeAlias",
|
|
3596
3778
|
"canonicalReference": "@genesislcap/foundation-ai!ChatRole:type",
|
|
@@ -3747,6 +3929,29 @@
|
|
|
3747
3929
|
"endIndex": 2
|
|
3748
3930
|
}
|
|
3749
3931
|
},
|
|
3932
|
+
{
|
|
3933
|
+
"kind": "Variable",
|
|
3934
|
+
"canonicalReference": "@genesislcap/foundation-ai!ChatTemperature:var",
|
|
3935
|
+
"docComment": "/**\n * Provider-agnostic, normalized sampling-temperature presets in `0`–`1` space — named handles for the values most callers actually want, so intent reads better than a bare magnitude. Each maps through `scaleTemperature` to the active provider's native range, so the same preset means the same intent whichever provider services the turn:\n *\n * - `ChatTemperature.Deterministic` (`0`) — greedy/argmax sampling. - `ChatTemperature.Focused` (`0.25`) — low but not greedy; precise tool calls and extraction work where you still want a little slack. - `ChatTemperature.Balanced` (`0.5`) — the provider's own default. - `ChatTemperature.Creative` (`0.75`) — hotter than default, short of the ceiling. - `ChatTemperature.Maximum` (`1`) — the hottest the active provider allows.\n *\n * (On a provider whose default equals its max — Anthropic — `Creative` and `Maximum` coincide; see `scaleTemperature`.)\n *\n * @beta\n */\n",
|
|
3936
|
+
"excerptTokens": [
|
|
3937
|
+
{
|
|
3938
|
+
"kind": "Content",
|
|
3939
|
+
"text": "ChatTemperature: "
|
|
3940
|
+
},
|
|
3941
|
+
{
|
|
3942
|
+
"kind": "Content",
|
|
3943
|
+
"text": "{\n readonly Deterministic: 0;\n readonly Focused: 0.25;\n readonly Balanced: 0.5;\n readonly Creative: 0.75;\n readonly Maximum: 1;\n}"
|
|
3944
|
+
}
|
|
3945
|
+
],
|
|
3946
|
+
"fileUrlPath": "src/utils/temperature.ts",
|
|
3947
|
+
"isReadonly": true,
|
|
3948
|
+
"releaseTag": "Beta",
|
|
3949
|
+
"name": "ChatTemperature",
|
|
3950
|
+
"variableTypeTokenRange": {
|
|
3951
|
+
"startIndex": 1,
|
|
3952
|
+
"endIndex": 2
|
|
3953
|
+
}
|
|
3954
|
+
},
|
|
3750
3955
|
{
|
|
3751
3956
|
"kind": "TypeAlias",
|
|
3752
3957
|
"canonicalReference": "@genesislcap/foundation-ai!ChatToolCall:type",
|
|
@@ -3814,6 +4019,32 @@
|
|
|
3814
4019
|
"endIndex": 3
|
|
3815
4020
|
}
|
|
3816
4021
|
},
|
|
4022
|
+
{
|
|
4023
|
+
"kind": "TypeAlias",
|
|
4024
|
+
"canonicalReference": "@genesislcap/foundation-ai!ChatToolChoice:type",
|
|
4025
|
+
"docComment": "/**\n * Controls whether (and how) the model may call a tool on a given turn. Maps to each provider's \"tool choice\" / \"function calling mode\" control:\n *\n * - `'auto'` (the default when omitted) — the model decides whether to call a tool or answer with text. Anthropic leaves `tool_choice` unset; Gemini leaves `functionCallingConfig` unset (`AUTO`). - `'required'` — the model MUST call one of the available tools. Maps to Anthropic `tool_choice: { type: 'any' }` and Gemini `functionCallingConfig.mode: 'ANY'`. - `'none'` — the model MUST NOT call a tool (text answer only). Maps to Anthropic `tool_choice: { type: 'none' }` and Gemini `functionCallingConfig.mode: 'NONE'`. - `{ tool: name }` — the model MUST call exactly the named tool. Maps to Anthropic `tool_choice: { type: 'tool', name }` and Gemini `functionCallingConfig.mode: 'ANY', allowedFunctionNames: [name]`. Use this for surgical forcing at a single-tool juncture (e.g. force a classifier tool in an intake step) while leaving `'auto'` everywhere multi-step work happens.\n *\n * Forcing (`'required'` / `{ tool }`) is a no-op when no tools are advertised.\n *\n * NOTE: forcing is incompatible with Anthropic extended/adaptive thinking — a request must not enable both.\n *\n * @beta\n */\n",
|
|
4026
|
+
"excerptTokens": [
|
|
4027
|
+
{
|
|
4028
|
+
"kind": "Content",
|
|
4029
|
+
"text": "export type ChatToolChoice = "
|
|
4030
|
+
},
|
|
4031
|
+
{
|
|
4032
|
+
"kind": "Content",
|
|
4033
|
+
"text": "'auto' | 'required' | 'none' | {\n tool: string;\n}"
|
|
4034
|
+
},
|
|
4035
|
+
{
|
|
4036
|
+
"kind": "Content",
|
|
4037
|
+
"text": ";"
|
|
4038
|
+
}
|
|
4039
|
+
],
|
|
4040
|
+
"fileUrlPath": "src/types/chat.types.ts",
|
|
4041
|
+
"releaseTag": "Beta",
|
|
4042
|
+
"name": "ChatToolChoice",
|
|
4043
|
+
"typeTokenRange": {
|
|
4044
|
+
"startIndex": 1,
|
|
4045
|
+
"endIndex": 2
|
|
4046
|
+
}
|
|
4047
|
+
},
|
|
3817
4048
|
{
|
|
3818
4049
|
"kind": "Interface",
|
|
3819
4050
|
"canonicalReference": "@genesislcap/foundation-ai!ChatToolDefinition:interface",
|