@genesislcap/foundation-ai 14.455.0 → 14.455.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,8 @@ import { __awaiter } from "tslib";
2
2
  import { SUPPORTED_GEMINI_MODEL_IDS, } from '../types';
3
3
  import { combineSignals } from '../utils/combine-signals';
4
4
  import { logger } from '../utils/logger';
5
+ import { scaleTemperature } from '../utils/temperature';
6
+ import { repairMalformedFunctionCall } from './gemini-malformed-call';
5
7
  const GEMINI_DIRECT_URL = (model) => `https://generativelanguage.googleapis.com/v1beta/models/${model}:generateContent`;
6
8
  const DEFAULT_MODEL = 'gemini-2.5-flash-lite';
7
9
  const DEFAULT_TIMEOUT = 180000; // 3 minutes
@@ -14,6 +16,22 @@ const GEMINI_CONTEXT_LIMITS = {
14
16
  'gemini-2.5-flash': 1048576,
15
17
  'gemini-2.5-flash-lite': 1048576,
16
18
  };
19
+ /** Gemini's native default + ceiling temperature — https://ai.google.dev/gemini-api/docs */
20
+ const GEMINI_DEFAULT_TEMPERATURE = 1;
21
+ const GEMINI_MAX_TEMPERATURE = 2;
22
+ /**
23
+ * Map a provider-agnostic {@link ChatToolChoice} to Gemini's `toolConfig`.
24
+ * Returns undefined for the default (`'auto'`) so the request omits the field.
25
+ */
26
+ function toGeminiToolConfig(choice) {
27
+ if (!choice || choice === 'auto')
28
+ return undefined;
29
+ if (choice === 'required')
30
+ return { functionCallingConfig: { mode: 'ANY' } };
31
+ if (choice === 'none')
32
+ return { functionCallingConfig: { mode: 'NONE' } };
33
+ return { functionCallingConfig: { mode: 'ANY', allowedFunctionNames: [choice.tool] } };
34
+ }
17
35
  function assertSupportedGeminiModel(model) {
18
36
  if (!SUPPORTED_GEMINI_MODEL_IDS.includes(model)) {
19
37
  throw new Error(`GeminiTransport: unsupported model "${model}". Use one of: ${SUPPORTED_GEMINI_MODEL_IDS.join(', ')}.`);
@@ -110,7 +128,7 @@ export class GeminiTransport {
110
128
  // ── ChatTransport (multi-turn chat) ────────────────────────────────────
111
129
  sendChatMessage(history, userMessage, options) {
112
130
  return __awaiter(this, void 0, void 0, function* () {
113
- var _a;
131
+ var _a, _b;
114
132
  const contents = this.toGeminiContents(history, userMessage, options === null || options === void 0 ? void 0 : options.attachments);
115
133
  const tools = ((_a = options === null || options === void 0 ? void 0 : options.tools) === null || _a === void 0 ? void 0 : _a.length)
116
134
  ? [
@@ -126,16 +144,32 @@ export class GeminiTransport {
126
144
  const systemInstruction = (options === null || options === void 0 ? void 0 : options.systemPrompt)
127
145
  ? { role: 'system', parts: [{ text: options.systemPrompt }] }
128
146
  : undefined;
129
- // `toolChoice: 'required'` forces a function call (`mode: 'ANY'`) instead of
130
- // a text answer used by sub-agent loops so the turn can only end via a
131
- // tool call. Only meaningful when functions are declared. Forcing ANY can
132
- // make the model batch calls in Python-ish syntax → `MALFORMED_FUNCTION_CALL`,
133
- // which `fromGeminiResponse` already detects and the caller retries.
134
- const toolConfig = (options === null || options === void 0 ? void 0 : options.toolChoice) === 'required' && tools
135
- ? { functionCallingConfig: { mode: 'ANY' } }
136
- : undefined;
137
- const response = yield this.post({ model: this.model, contents, tools, systemInstruction, toolConfig }, options === null || options === void 0 ? void 0 : options.signal);
138
- return this.fromGeminiResponse(response);
147
+ // Map the requested tool-call mode to Gemini's toolConfig — only meaningful
148
+ // when functions are declared. NOTE: forcing (`ANY` / allowedFunctionNames)
149
+ // can make the model batch calls in Python-ish syntax
150
+ // `MALFORMED_FUNCTION_CALL`, which `fromGeminiResponse` repairs in place when
151
+ // it can confidently parse the call (see `repairMalformedFunctionCall`),
152
+ // falling back to the caller's retry otherwise.
153
+ const toolConfig = tools ? toGeminiToolConfig(options === null || options === void 0 ? void 0 : options.toolChoice) : undefined;
154
+ // Request thought summaries so the model's official reasoning is returned
155
+ // (and a thinking-only turn surfaces *something* rather than going silently
156
+ // blank). Gemini 2.5 Pro always thinks regardless, so this only changes what
157
+ // is RETURNED, not what is billed — see `logTokenUsage` for the thinking-token
158
+ // accounting it lets us finally capture.
159
+ const generationConfig = { thinkingConfig: { includeThoughts: true } };
160
+ // Normalized [0,1] temperature → Gemini's native range, anchored so 0.5
161
+ // maps to its default (native 1) and 1 to its ceiling (native 2).
162
+ if ((options === null || options === void 0 ? void 0 : options.temperature) != null) {
163
+ generationConfig.temperature = scaleTemperature(options.temperature, {
164
+ defaultTemp: GEMINI_DEFAULT_TEMPERATURE,
165
+ maxTemp: GEMINI_MAX_TEMPERATURE,
166
+ });
167
+ }
168
+ // Names of the tools offered this turn — used to validate a repaired
169
+ // malformed call against the real tool surface before accepting it.
170
+ const offeredToolNames = new Set(((_b = options === null || options === void 0 ? void 0 : options.tools) !== null && _b !== void 0 ? _b : []).map((t) => t.name));
171
+ const response = yield this.post({ model: this.model, contents, tools, systemInstruction, toolConfig, generationConfig }, options === null || options === void 0 ? void 0 : options.signal);
172
+ return this.fromGeminiResponse(response, offeredToolNames);
139
173
  });
140
174
  }
141
175
  /**
@@ -143,15 +177,19 @@ export class GeminiTransport {
143
177
  * and returns the per-call total so the caller can attach it to the response
144
178
  * message.
145
179
  */
146
- logTokenUsage(promptTokens, candidateTokens) {
180
+ logTokenUsage(promptTokens, candidateTokens, thoughtTokens) {
147
181
  const { promptPerMillion, candidatePerMillion } = estimatedGeminiPaidRatesUsdPerMillion(this.model);
148
182
  const promptCost = (promptTokens / GeminiTransport.TOKENS_PER_MILLION) * promptPerMillion;
149
- const candidateCost = (candidateTokens / GeminiTransport.TOKENS_PER_MILLION) * candidatePerMillion;
183
+ // Thinking tokens are billed at the output (candidate) rate. They are
184
+ // incurred whenever the model thinks (always, on 2.5 Pro) — counting them
185
+ // here corrects a long-standing undercount, it does not raise the bill.
186
+ const candidateCost = ((candidateTokens + thoughtTokens) / GeminiTransport.TOKENS_PER_MILLION) *
187
+ candidatePerMillion;
150
188
  const totalCost = promptCost + candidateCost;
151
189
  this.lifetimeCostUsd += totalCost;
152
190
  console.log(`--- Gemini Token Usage (${this.model}) ---`);
153
191
  console.log(`Prompt Tokens: ${promptTokens} ($${promptCost.toFixed(GeminiTransport.COST_DECIMAL_PLACES)})`);
154
- console.log(`Candidate Tokens: ${candidateTokens} ($${candidateCost.toFixed(GeminiTransport.COST_DECIMAL_PLACES)})`);
192
+ console.log(`Candidate Tokens: ${candidateTokens} (+${thoughtTokens} thinking) ($${candidateCost.toFixed(GeminiTransport.COST_DECIMAL_PLACES)})`);
155
193
  console.log(`Total Cost: $${totalCost.toFixed(GeminiTransport.COST_DECIMAL_PLACES)}`);
156
194
  console.log(`Lifetime Cost: $${this.lifetimeCostUsd.toFixed(GeminiTransport.COST_DECIMAL_PLACES)}`);
157
195
  console.log('--------------------------');
@@ -210,27 +248,32 @@ export class GeminiTransport {
210
248
  }
211
249
  return contents;
212
250
  }
213
- fromGeminiResponse(response) {
214
- var _a, _b, _c, _d, _e;
251
+ fromGeminiResponse(response, offeredToolNames = new Set()) {
252
+ var _a, _b, _c, _d, _e, _f, _g, _h, _j;
215
253
  let inputTokens;
216
254
  let outputTokens;
255
+ let thoughtsTokens;
217
256
  let cost;
218
257
  if (response.usageMetadata) {
219
258
  const usage = response.usageMetadata;
220
- cost = this.logTokenUsage((_a = usage.promptTokenCount) !== null && _a !== void 0 ? _a : 0, (_b = usage.candidatesTokenCount) !== null && _b !== void 0 ? _b : 0);
259
+ cost = this.logTokenUsage((_a = usage.promptTokenCount) !== null && _a !== void 0 ? _a : 0, (_b = usage.candidatesTokenCount) !== null && _b !== void 0 ? _b : 0, (_c = usage.thoughtsTokenCount) !== null && _c !== void 0 ? _c : 0);
221
260
  if (usage.promptTokenCount != null) {
222
261
  inputTokens = usage.promptTokenCount;
223
262
  }
224
- if (usage.candidatesTokenCount != null) {
225
- outputTokens = usage.candidatesTokenCount;
263
+ if (usage.thoughtsTokenCount != null) {
264
+ thoughtsTokens = usage.thoughtsTokenCount;
265
+ }
266
+ if (usage.candidatesTokenCount != null || usage.thoughtsTokenCount != null) {
267
+ // Thinking tokens are generated output, billed at the candidate rate, but
268
+ // reported in a field disjoint from `candidatesTokenCount`. Fold them into
269
+ // the provider-agnostic `outputTokens` so it reflects the true generated
270
+ // total — matching Anthropic, whose `output_tokens` already includes them.
271
+ outputTokens = ((_d = usage.candidatesTokenCount) !== null && _d !== void 0 ? _d : 0) + ((_e = usage.thoughtsTokenCount) !== null && _e !== void 0 ? _e : 0);
226
272
  }
227
273
  }
228
274
  const candidates = response === null || response === void 0 ? void 0 : response.candidates;
229
275
  const firstCandidate = candidates === null || candidates === void 0 ? void 0 : candidates[0];
230
- if ((firstCandidate === null || firstCandidate === void 0 ? void 0 : firstCandidate.finishReason) === 'MALFORMED_FUNCTION_CALL') {
231
- throw new MalformedFunctionCallError(firstCandidate.finishMessage);
232
- }
233
- const parts = (_d = (_c = firstCandidate === null || firstCandidate === void 0 ? void 0 : firstCandidate.content) === null || _c === void 0 ? void 0 : _c.parts) !== null && _d !== void 0 ? _d : [];
276
+ const parts = (_g = (_f = firstCandidate === null || firstCandidate === void 0 ? void 0 : firstCandidate.content) === null || _f === void 0 ? void 0 : _f.parts) !== null && _g !== void 0 ? _g : [];
234
277
  const toolCalls = [];
235
278
  const thoughtParts = [];
236
279
  const textParts = [];
@@ -239,7 +282,7 @@ export class GeminiTransport {
239
282
  toolCalls.push({
240
283
  id: crypto.randomUUID(),
241
284
  name: part.functionCall.name,
242
- args: (_e = part.functionCall.args) !== null && _e !== void 0 ? _e : {},
285
+ args: (_h = part.functionCall.args) !== null && _h !== void 0 ? _h : {},
243
286
  });
244
287
  }
245
288
  else if (part.thought && part.text) {
@@ -249,26 +292,108 @@ export class GeminiTransport {
249
292
  textParts.push(part.text);
250
293
  }
251
294
  }
252
- // For tool-call responses, surface thought parts as `content` so the UI can
253
- // render them as a collapsible thinking block. Fall back to text narration
254
- // (textParts) for models that don't emit native thought parts (e.g. some Flash models)
255
- // but do narrate alongside tool calls when prompted to do so.
256
- // For final answers, surface only the response text.
295
+ // Gemini sometimes flags MALFORMED_FUNCTION_CALL when it produced a complete
296
+ // tool call but serialised it as a Python statement. Recover it in place
297
+ // when we can parse it with full confidence (its `parts` are empty in this
298
+ // case, so the repaired call flows through the normal token/cost handling
299
+ // below); otherwise throw so the ChatDriver retry takes over.
300
+ if ((firstCandidate === null || firstCandidate === void 0 ? void 0 : firstCandidate.finishReason) === 'MALFORMED_FUNCTION_CALL') {
301
+ const repaired = repairMalformedFunctionCall(firstCandidate.finishMessage, offeredToolNames);
302
+ if (!repaired) {
303
+ throw new MalformedFunctionCallError(firstCandidate.finishMessage);
304
+ }
305
+ toolCalls.push({ id: crypto.randomUUID(), name: repaired.name, args: repaired.args });
306
+ }
307
+ // The model's official thinking (thought-summary parts, returned because we
308
+ // set `thinkingConfig.includeThoughts`) is surfaced through the SAME channel
309
+ // as its tool-call narration: both go in `content`, which the driver tags as
310
+ // a toggleable "thinking" block. We combine them so neither is dropped.
311
+ // NOTE: for now real thinking and narration share one block; if the real
312
+ // thinking proves too verbose we may want to split it into its own channel.
313
+ // Final answers surface only the answer text — thought summaries are
314
+ // deliberately not promoted to be the answer.
315
+ const thinking = thoughtParts.join('');
316
+ const narration = textParts.join('');
257
317
  const base = toolCalls.length > 0
258
318
  ? {
259
319
  role: 'assistant',
260
- content: thoughtParts.join('') || textParts.join(''),
320
+ content: [thinking, narration].filter(Boolean).join('\n\n'),
261
321
  toolCalls,
262
322
  }
263
- : { role: 'assistant', content: textParts.join('') };
323
+ : { role: 'assistant', content: narration };
324
+ // Blank / non-STOP finishes are the recurring failure mode (especially empty
325
+ // STOP on 2.5 Pro). Log the full shape so the cause is legible from the
326
+ // breakdown rather than inferred. MALFORMED is excluded — it has its own
327
+ // repair/throw path above.
328
+ const finishReason = firstCandidate === null || firstCandidate === void 0 ? void 0 : firstCandidate.finishReason;
329
+ const isBlank = toolCalls.length === 0 && !thinking && !narration;
330
+ if (isBlank ||
331
+ (finishReason && finishReason !== 'STOP' && finishReason !== 'MALFORMED_FUNCTION_CALL')) {
332
+ this.logAbnormalResponse(response, {
333
+ finishReason,
334
+ functionCallParts: toolCalls.length,
335
+ thoughtParts: thoughtParts.length,
336
+ textParts: textParts.length,
337
+ });
338
+ }
264
339
  if (inputTokens != null)
265
340
  base.inputTokens = inputTokens;
266
341
  if (outputTokens != null)
267
342
  base.outputTokens = outputTokens;
268
343
  if (cost != null)
269
344
  base.cost = cost;
345
+ // Surface the provider diagnostic on the message so the driver can fold it
346
+ // into the debug-log meta events (the console log above is dev-only). Only
347
+ // when there's signal — a finish reason, thinking tokens, or a block reason.
348
+ const blockReason = (_j = response.promptFeedback) === null || _j === void 0 ? void 0 : _j.blockReason;
349
+ if (finishReason != null || thoughtsTokens != null || blockReason != null) {
350
+ base.responseMeta = {
351
+ finishReason,
352
+ thoughtsTokens,
353
+ parts: {
354
+ functionCall: toolCalls.length,
355
+ thought: thoughtParts.length,
356
+ text: textParts.length,
357
+ },
358
+ blockReason,
359
+ };
360
+ }
270
361
  return base;
271
362
  }
363
+ /**
364
+ * Log the full shape of a blank or non-STOP response so its cause is legible
365
+ * without re-deriving it: a thinking-only STOP (substantial `thoughtsTokenCount`,
366
+ * ~0 `candidatesTokenCount`) vs a content block (`SAFETY` / `RECITATION`) vs a
367
+ * token cap (`MAX_TOKENS`) vs a prompt-level block (top-level
368
+ * `promptFeedback.blockReason`). On 2.5 Pro — which always thinks — a blank
369
+ * STOP carrying substantial thought tokens is the "thought, then stopped
370
+ * without answering" signature.
371
+ */
372
+ logAbnormalResponse(response, parsed) {
373
+ var _a, _b, _c;
374
+ const usage = ((_a = response.usageMetadata) !== null && _a !== void 0 ? _a : {});
375
+ const candidate = (_c = (_b = response.candidates) === null || _b === void 0 ? void 0 : _b[0]) !== null && _c !== void 0 ? _c : {};
376
+ logger.warn('GeminiTransport: blank/non-STOP response', {
377
+ model: this.model,
378
+ finishReason: parsed.finishReason,
379
+ finishMessage: candidate.finishMessage,
380
+ parts: {
381
+ total: parsed.functionCallParts + parsed.thoughtParts + parsed.textParts,
382
+ functionCall: parsed.functionCallParts,
383
+ thought: parsed.thoughtParts,
384
+ text: parsed.textParts,
385
+ },
386
+ usage: {
387
+ promptTokenCount: usage.promptTokenCount,
388
+ candidatesTokenCount: usage.candidatesTokenCount,
389
+ thoughtsTokenCount: usage.thoughtsTokenCount,
390
+ totalTokenCount: usage.totalTokenCount,
391
+ },
392
+ promptFeedback: response.promptFeedback,
393
+ safetyRatings: candidate.safetyRatings,
394
+ citationMetadata: candidate.citationMetadata,
395
+ });
396
+ }
272
397
  buildEndpoint(body) {
273
398
  if (this.apiKey) {
274
399
  return {
@@ -0,0 +1,56 @@
1
+ /**
2
+ * Normalized-temperature anchor. Values at or below it scale into
3
+ * `[0, providerDefault]`; values above it scale into `[providerDefault, providerMax]`.
4
+ * So a normalized `0.5` maps to each provider's own default temperature.
5
+ */
6
+ const DEFAULT_ANCHOR = 0.5;
7
+ /**
8
+ * Translate a provider-agnostic, normalized temperature into a provider's
9
+ * native range, anchored on the provider's own default so the normalized scale
10
+ * means the same thing everywhere:
11
+ *
12
+ * - `0` → `0` (fully deterministic),
13
+ * - `0.5` → the provider's default temperature,
14
+ * - `1` → the provider's maximum.
15
+ *
16
+ * Values between the anchors interpolate linearly within each half, so `< 0.5`
17
+ * is "more focused than default" and `> 0.5` is "more random than default" on
18
+ * every provider — even though the providers' native ranges differ (Anthropic
19
+ * default/max `1`/`1`, Gemini `1`/`2`). Where a provider's default equals its
20
+ * max (Anthropic), the upper half is flat: it cannot go hotter than its default.
21
+ *
22
+ * The input is clamped to `[0, 1]` before scaling, so an out-of-range value can
23
+ * never produce a native temperature the provider would reject.
24
+ */
25
+ export function scaleTemperature(normalized, { defaultTemp, maxTemp }) {
26
+ const t = Math.min(Math.max(normalized, 0), 1);
27
+ return t <= DEFAULT_ANCHOR
28
+ ? (t / DEFAULT_ANCHOR) * defaultTemp
29
+ : defaultTemp + ((t - DEFAULT_ANCHOR) / (1 - DEFAULT_ANCHOR)) * (maxTemp - defaultTemp);
30
+ }
31
+ /**
32
+ * Provider-agnostic, normalized sampling-temperature presets in `0`–`1` space —
33
+ * named handles for the values most callers actually want, so intent reads
34
+ * better than a bare magnitude. Each maps through `scaleTemperature` to the
35
+ * active provider's native range, so the same preset means the same intent
36
+ * whichever provider services the turn:
37
+ *
38
+ * - `ChatTemperature.Deterministic` (`0`) — greedy/argmax sampling.
39
+ * - `ChatTemperature.Focused` (`0.25`) — low but not greedy; precise tool calls
40
+ * and extraction work where you still want a little slack.
41
+ * - `ChatTemperature.Balanced` (`0.5`) — the provider's own default.
42
+ * - `ChatTemperature.Creative` (`0.75`) — hotter than default, short of the ceiling.
43
+ * - `ChatTemperature.Maximum` (`1`) — the hottest the active provider allows.
44
+ *
45
+ * (On a provider whose default equals its max — Anthropic — `Creative` and
46
+ * `Maximum` coincide; see `scaleTemperature`.)
47
+ *
48
+ * @beta
49
+ */
50
+ export const ChatTemperature = {
51
+ Deterministic: 0,
52
+ Focused: 0.25,
53
+ Balanced: 0.5,
54
+ Creative: 0.75,
55
+ Maximum: 1,
56
+ };
@@ -3282,6 +3282,34 @@
3282
3282
  "endIndex": 2
3283
3283
  }
3284
3284
  },
3285
+ {
3286
+ "kind": "PropertySignature",
3287
+ "canonicalReference": "@genesislcap/foundation-ai!ChatMessage#responseMeta:member",
3288
+ "docComment": "/**\n * Provider diagnostic for the request that produced this message — the raw finish reason plus, where the provider reports them, the reasoning-token count and a parts breakdown. Set by transports that expose it (Gemini); the driver folds it into the debug-log meta events (e.g. an empty-response `turn.retry`/`turn.error`) so a blank or truncated turn's cause is legible. Not shown to the user.\n */\n",
3289
+ "excerptTokens": [
3290
+ {
3291
+ "kind": "Content",
3292
+ "text": "responseMeta?: "
3293
+ },
3294
+ {
3295
+ "kind": "Reference",
3296
+ "text": "ChatResponseMeta",
3297
+ "canonicalReference": "@genesislcap/foundation-ai!ChatResponseMeta:interface"
3298
+ },
3299
+ {
3300
+ "kind": "Content",
3301
+ "text": ";"
3302
+ }
3303
+ ],
3304
+ "isReadonly": false,
3305
+ "isOptional": true,
3306
+ "releaseTag": "Beta",
3307
+ "name": "responseMeta",
3308
+ "propertyTypeTokenRange": {
3309
+ "startIndex": 1,
3310
+ "endIndex": 2
3311
+ }
3312
+ },
3285
3313
  {
3286
3314
  "kind": "PropertySignature",
3287
3315
  "canonicalReference": "@genesislcap/foundation-ai!ChatMessage#role:member",
@@ -3529,18 +3557,46 @@
3529
3557
  "endIndex": 2
3530
3558
  }
3531
3559
  },
3560
+ {
3561
+ "kind": "PropertySignature",
3562
+ "canonicalReference": "@genesislcap/foundation-ai!ChatRequestOptions#temperature:member",
3563
+ "docComment": "/**\n * Provider-agnostic sampling temperature, normalized to `0`–`1` and anchored on each provider's own default: `0` is fully deterministic, `0.5` is the provider's default, and `1` is the most random it allows. So `< 0.5` is \"more focused than default\" and `> 0.5` is \"more random than default\" on every provider, even though their native ranges differ (Anthropic `temperature` `0`–`1`, Gemini `generationConfig.temperature` `0`–`2`). Where a provider's default equals its max (Anthropic), the upper half is flat. Values outside `0`–`1` are clamped. Omit to use the provider/model default (equivalent to `0.5`). Prefer the `ChatTemperature` presets for common intents.\n *\n * @beta\n */\n",
3564
+ "excerptTokens": [
3565
+ {
3566
+ "kind": "Content",
3567
+ "text": "temperature?: "
3568
+ },
3569
+ {
3570
+ "kind": "Content",
3571
+ "text": "number"
3572
+ },
3573
+ {
3574
+ "kind": "Content",
3575
+ "text": ";"
3576
+ }
3577
+ ],
3578
+ "isReadonly": false,
3579
+ "isOptional": true,
3580
+ "releaseTag": "Beta",
3581
+ "name": "temperature",
3582
+ "propertyTypeTokenRange": {
3583
+ "startIndex": 1,
3584
+ "endIndex": 2
3585
+ }
3586
+ },
3532
3587
  {
3533
3588
  "kind": "PropertySignature",
3534
3589
  "canonicalReference": "@genesislcap/foundation-ai!ChatRequestOptions#toolChoice:member",
3535
- "docComment": "/**\n * Whether the model MAY call a tool (`'auto'`, the default when omitted) or MUST call one (`'required'`). `'required'` maps to Anthropic `tool_choice: { type: 'any' }` and Gemini `functionCallingConfig.mode: 'ANY'`. Used by sub-agent loops so a sub-agent can only end a turn by calling a tool (e.g. its completion tool), never by emitting a free-text answer.\n *\n * NOTE: `'required'` is incompatible with Anthropic extended/adaptive thinking a request must not enable both.\n *\n * @beta\n */\n",
3590
+ "docComment": "/**\n * Whether (and how) the model may call a tool this turn. Defaults to `'auto'` when omitted. Used by sub-agent loops (which force `'required'` so a turn can only end via a tool call) and configurable per agent / per agent state. See {@link ChatToolChoice}.\n *\n * @beta\n */\n",
3536
3591
  "excerptTokens": [
3537
3592
  {
3538
3593
  "kind": "Content",
3539
3594
  "text": "toolChoice?: "
3540
3595
  },
3541
3596
  {
3542
- "kind": "Content",
3543
- "text": "'auto' | 'required'"
3597
+ "kind": "Reference",
3598
+ "text": "ChatToolChoice",
3599
+ "canonicalReference": "@genesislcap/foundation-ai!ChatToolChoice:type"
3544
3600
  },
3545
3601
  {
3546
3602
  "kind": "Content",
@@ -3591,6 +3647,132 @@
3591
3647
  ],
3592
3648
  "extendsTokenRanges": []
3593
3649
  },
3650
+ {
3651
+ "kind": "Interface",
3652
+ "canonicalReference": "@genesislcap/foundation-ai!ChatResponseMeta:interface",
3653
+ "docComment": "/**\n * Provider-reported diagnostic for a single chat response, surfaced so the driver can attribute a blank/abnormal turn without re-deriving it. All fields are optional — a transport sets only what its provider reports.\n *\n * @beta\n */\n",
3654
+ "excerptTokens": [
3655
+ {
3656
+ "kind": "Content",
3657
+ "text": "export interface ChatResponseMeta "
3658
+ }
3659
+ ],
3660
+ "fileUrlPath": "src/types/chat.types.ts",
3661
+ "releaseTag": "Beta",
3662
+ "name": "ChatResponseMeta",
3663
+ "preserveMemberOrder": false,
3664
+ "members": [
3665
+ {
3666
+ "kind": "PropertySignature",
3667
+ "canonicalReference": "@genesislcap/foundation-ai!ChatResponseMeta#blockReason:member",
3668
+ "docComment": "/**\n * Provider block reason when the prompt/response was blocked rather than generated.\n */\n",
3669
+ "excerptTokens": [
3670
+ {
3671
+ "kind": "Content",
3672
+ "text": "blockReason?: "
3673
+ },
3674
+ {
3675
+ "kind": "Content",
3676
+ "text": "string"
3677
+ },
3678
+ {
3679
+ "kind": "Content",
3680
+ "text": ";"
3681
+ }
3682
+ ],
3683
+ "isReadonly": false,
3684
+ "isOptional": true,
3685
+ "releaseTag": "Beta",
3686
+ "name": "blockReason",
3687
+ "propertyTypeTokenRange": {
3688
+ "startIndex": 1,
3689
+ "endIndex": 2
3690
+ }
3691
+ },
3692
+ {
3693
+ "kind": "PropertySignature",
3694
+ "canonicalReference": "@genesislcap/foundation-ai!ChatResponseMeta#finishReason:member",
3695
+ "docComment": "/**\n * Raw provider finish reason for the turn, verbatim — e.g. Gemini `'STOP'` | `'MAX_TOKENS'` | `'SAFETY'` | `'RECITATION'`. The key signal when triaging a blank or truncated turn.\n */\n",
3696
+ "excerptTokens": [
3697
+ {
3698
+ "kind": "Content",
3699
+ "text": "finishReason?: "
3700
+ },
3701
+ {
3702
+ "kind": "Content",
3703
+ "text": "string"
3704
+ },
3705
+ {
3706
+ "kind": "Content",
3707
+ "text": ";"
3708
+ }
3709
+ ],
3710
+ "isReadonly": false,
3711
+ "isOptional": true,
3712
+ "releaseTag": "Beta",
3713
+ "name": "finishReason",
3714
+ "propertyTypeTokenRange": {
3715
+ "startIndex": 1,
3716
+ "endIndex": 2
3717
+ }
3718
+ },
3719
+ {
3720
+ "kind": "PropertySignature",
3721
+ "canonicalReference": "@genesislcap/foundation-ai!ChatResponseMeta#parts:member",
3722
+ "docComment": "/**\n * Count of response parts by kind — distinguishes a truly empty turn from a thinking-only one.\n */\n",
3723
+ "excerptTokens": [
3724
+ {
3725
+ "kind": "Content",
3726
+ "text": "parts?: "
3727
+ },
3728
+ {
3729
+ "kind": "Content",
3730
+ "text": "{\n functionCall: number;\n thought: number;\n text: number;\n }"
3731
+ },
3732
+ {
3733
+ "kind": "Content",
3734
+ "text": ";"
3735
+ }
3736
+ ],
3737
+ "isReadonly": false,
3738
+ "isOptional": true,
3739
+ "releaseTag": "Beta",
3740
+ "name": "parts",
3741
+ "propertyTypeTokenRange": {
3742
+ "startIndex": 1,
3743
+ "endIndex": 2
3744
+ }
3745
+ },
3746
+ {
3747
+ "kind": "PropertySignature",
3748
+ "canonicalReference": "@genesislcap/foundation-ai!ChatResponseMeta#thoughtsTokens:member",
3749
+ "docComment": "/**\n * Reasoning (\"thinking\") tokens billed for this turn, when the provider reports them (Gemini 2.5). A high count alongside ~0 output tokens and a `'STOP'` finish is the \"thought, then stopped without answering\" signature.\n */\n",
3750
+ "excerptTokens": [
3751
+ {
3752
+ "kind": "Content",
3753
+ "text": "thoughtsTokens?: "
3754
+ },
3755
+ {
3756
+ "kind": "Content",
3757
+ "text": "number"
3758
+ },
3759
+ {
3760
+ "kind": "Content",
3761
+ "text": ";"
3762
+ }
3763
+ ],
3764
+ "isReadonly": false,
3765
+ "isOptional": true,
3766
+ "releaseTag": "Beta",
3767
+ "name": "thoughtsTokens",
3768
+ "propertyTypeTokenRange": {
3769
+ "startIndex": 1,
3770
+ "endIndex": 2
3771
+ }
3772
+ }
3773
+ ],
3774
+ "extendsTokenRanges": []
3775
+ },
3594
3776
  {
3595
3777
  "kind": "TypeAlias",
3596
3778
  "canonicalReference": "@genesislcap/foundation-ai!ChatRole:type",
@@ -3747,6 +3929,29 @@
3747
3929
  "endIndex": 2
3748
3930
  }
3749
3931
  },
3932
+ {
3933
+ "kind": "Variable",
3934
+ "canonicalReference": "@genesislcap/foundation-ai!ChatTemperature:var",
3935
+ "docComment": "/**\n * Provider-agnostic, normalized sampling-temperature presets in `0`–`1` space — named handles for the values most callers actually want, so intent reads better than a bare magnitude. Each maps through `scaleTemperature` to the active provider's native range, so the same preset means the same intent whichever provider services the turn:\n *\n * - `ChatTemperature.Deterministic` (`0`) — greedy/argmax sampling. - `ChatTemperature.Focused` (`0.25`) — low but not greedy; precise tool calls and extraction work where you still want a little slack. - `ChatTemperature.Balanced` (`0.5`) — the provider's own default. - `ChatTemperature.Creative` (`0.75`) — hotter than default, short of the ceiling. - `ChatTemperature.Maximum` (`1`) — the hottest the active provider allows.\n *\n * (On a provider whose default equals its max — Anthropic — `Creative` and `Maximum` coincide; see `scaleTemperature`.)\n *\n * @beta\n */\n",
3936
+ "excerptTokens": [
3937
+ {
3938
+ "kind": "Content",
3939
+ "text": "ChatTemperature: "
3940
+ },
3941
+ {
3942
+ "kind": "Content",
3943
+ "text": "{\n readonly Deterministic: 0;\n readonly Focused: 0.25;\n readonly Balanced: 0.5;\n readonly Creative: 0.75;\n readonly Maximum: 1;\n}"
3944
+ }
3945
+ ],
3946
+ "fileUrlPath": "src/utils/temperature.ts",
3947
+ "isReadonly": true,
3948
+ "releaseTag": "Beta",
3949
+ "name": "ChatTemperature",
3950
+ "variableTypeTokenRange": {
3951
+ "startIndex": 1,
3952
+ "endIndex": 2
3953
+ }
3954
+ },
3750
3955
  {
3751
3956
  "kind": "TypeAlias",
3752
3957
  "canonicalReference": "@genesislcap/foundation-ai!ChatToolCall:type",
@@ -3814,6 +4019,32 @@
3814
4019
  "endIndex": 3
3815
4020
  }
3816
4021
  },
4022
+ {
4023
+ "kind": "TypeAlias",
4024
+ "canonicalReference": "@genesislcap/foundation-ai!ChatToolChoice:type",
4025
+ "docComment": "/**\n * Controls whether (and how) the model may call a tool on a given turn. Maps to each provider's \"tool choice\" / \"function calling mode\" control:\n *\n * - `'auto'` (the default when omitted) — the model decides whether to call a tool or answer with text. Anthropic leaves `tool_choice` unset; Gemini leaves `functionCallingConfig` unset (`AUTO`). - `'required'` — the model MUST call one of the available tools. Maps to Anthropic `tool_choice: { type: 'any' }` and Gemini `functionCallingConfig.mode: 'ANY'`. - `'none'` — the model MUST NOT call a tool (text answer only). Maps to Anthropic `tool_choice: { type: 'none' }` and Gemini `functionCallingConfig.mode: 'NONE'`. - `{ tool: name }` — the model MUST call exactly the named tool. Maps to Anthropic `tool_choice: { type: 'tool', name }` and Gemini `functionCallingConfig.mode: 'ANY', allowedFunctionNames: [name]`. Use this for surgical forcing at a single-tool juncture (e.g. force a classifier tool in an intake step) while leaving `'auto'` everywhere multi-step work happens.\n *\n * Forcing (`'required'` / `{ tool }`) is a no-op when no tools are advertised.\n *\n * NOTE: forcing is incompatible with Anthropic extended/adaptive thinking — a request must not enable both.\n *\n * @beta\n */\n",
4026
+ "excerptTokens": [
4027
+ {
4028
+ "kind": "Content",
4029
+ "text": "export type ChatToolChoice = "
4030
+ },
4031
+ {
4032
+ "kind": "Content",
4033
+ "text": "'auto' | 'required' | 'none' | {\n tool: string;\n}"
4034
+ },
4035
+ {
4036
+ "kind": "Content",
4037
+ "text": ";"
4038
+ }
4039
+ ],
4040
+ "fileUrlPath": "src/types/chat.types.ts",
4041
+ "releaseTag": "Beta",
4042
+ "name": "ChatToolChoice",
4043
+ "typeTokenRange": {
4044
+ "startIndex": 1,
4045
+ "endIndex": 2
4046
+ }
4047
+ },
3817
4048
  {
3818
4049
  "kind": "Interface",
3819
4050
  "canonicalReference": "@genesislcap/foundation-ai!ChatToolDefinition:interface",