npm - @genesislcap/foundation-ai - Versions diffs - 14.454.2 → 14.455.1 - Mend

@genesislcap/foundation-ai 14.454.2 → 14.455.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

package/dist/dts/index.d.ts +2 -1
package/dist/dts/index.d.ts.map +1 -1
package/dist/dts/transports/anthropic-transport.d.ts.map +1 -1
package/dist/dts/transports/gemini-malformed-call.d.ts +17 -0
package/dist/dts/transports/gemini-malformed-call.d.ts.map +1 -0
package/dist/dts/transports/gemini-transport.d.ts +10 -0
package/dist/dts/transports/gemini-transport.d.ts.map +1 -1
package/dist/dts/types/chat.types.d.ts +87 -9
package/dist/dts/types/chat.types.d.ts.map +1 -1
package/dist/dts/utils/temperature.d.ts +49 -0
package/dist/dts/utils/temperature.d.ts.map +1 -0
package/dist/esm/index.js +1 -0
package/dist/esm/transports/anthropic-transport.js +32 -5
package/dist/esm/transports/gemini-malformed-call.js +242 -0
package/dist/esm/transports/gemini-transport.js +156 -31
package/dist/esm/utils/temperature.js +56 -0
package/dist/foundation-ai.api.json +234 -3
package/dist/foundation-ai.d.ts +126 -9
package/package.json +11 -11

package/dist/esm/transports/gemini-transport.js CHANGED Viewed

@@ -2,6 +2,8 @@ import { __awaiter } from "tslib";
 import { SUPPORTED_GEMINI_MODEL_IDS, } from '../types';
 import { combineSignals } from '../utils/combine-signals';
 import { logger } from '../utils/logger';
+import { scaleTemperature } from '../utils/temperature';
+import { repairMalformedFunctionCall } from './gemini-malformed-call';
 const GEMINI_DIRECT_URL = (model) => `https://generativelanguage.googleapis.com/v1beta/models/${model}:generateContent`;
 const DEFAULT_MODEL = 'gemini-2.5-flash-lite';
 const DEFAULT_TIMEOUT = 180000; // 3 minutes
@@ -14,6 +16,22 @@ const GEMINI_CONTEXT_LIMITS = {
     'gemini-2.5-flash': 1048576,
     'gemini-2.5-flash-lite': 1048576,
 };
+/** Gemini's native default + ceiling temperature — https://ai.google.dev/gemini-api/docs */
+const GEMINI_DEFAULT_TEMPERATURE = 1;
+const GEMINI_MAX_TEMPERATURE = 2;
+/**
+ * Map a provider-agnostic {@link ChatToolChoice} to Gemini's `toolConfig`.
+ * Returns undefined for the default (`'auto'`) so the request omits the field.
+ */
+function toGeminiToolConfig(choice) {
+    if (!choice || choice === 'auto')
+        return undefined;
+    if (choice === 'required')
+        return { functionCallingConfig: { mode: 'ANY' } };
+    if (choice === 'none')
+        return { functionCallingConfig: { mode: 'NONE' } };
+    return { functionCallingConfig: { mode: 'ANY', allowedFunctionNames: [choice.tool] } };
+}
 function assertSupportedGeminiModel(model) {
     if (!SUPPORTED_GEMINI_MODEL_IDS.includes(model)) {
         throw new Error(`GeminiTransport: unsupported model "${model}". Use one of: ${SUPPORTED_GEMINI_MODEL_IDS.join(', ')}.`);
@@ -110,7 +128,7 @@ export class GeminiTransport {
     // ── ChatTransport (multi-turn chat) ────────────────────────────────────
     sendChatMessage(history, userMessage, options) {
         return __awaiter(this, void 0, void 0, function* () {
-            var _a;
+            var _a, _b;
             const contents = this.toGeminiContents(history, userMessage, options === null || options === void 0 ? void 0 : options.attachments);
             const tools = ((_a = options === null || options === void 0 ? void 0 : options.tools) === null || _a === void 0 ? void 0 : _a.length)
                 ? [
@@ -126,16 +144,32 @@ export class GeminiTransport {
             const systemInstruction = (options === null || options === void 0 ? void 0 : options.systemPrompt)
                 ? { role: 'system', parts: [{ text: options.systemPrompt }] }
                 : undefined;
-            // `toolChoice: 'required'` forces a function call (`mode: 'ANY'`) instead of
-            // a text answer — used by sub-agent loops so the turn can only end via a
-            // tool call. Only meaningful when functions are declared. Forcing ANY can
-            // make the model batch calls in Python-ish syntax → `MALFORMED_FUNCTION_CALL`,
-            // which `fromGeminiResponse` already detects and the caller retries.
-            const toolConfig = (options === null || options === void 0 ? void 0 : options.toolChoice) === 'required' && tools
-                ? { functionCallingConfig: { mode: 'ANY' } }
-                : undefined;
-            const response = yield this.post({ model: this.model, contents, tools, systemInstruction, toolConfig }, options === null || options === void 0 ? void 0 : options.signal);
-            return this.fromGeminiResponse(response);
+            // Map the requested tool-call mode to Gemini's toolConfig — only meaningful
+            // when functions are declared. NOTE: forcing (`ANY` / allowedFunctionNames)
+            // can make the model batch calls in Python-ish syntax →
+            // `MALFORMED_FUNCTION_CALL`, which `fromGeminiResponse` repairs in place when
+            // it can confidently parse the call (see `repairMalformedFunctionCall`),
+            // falling back to the caller's retry otherwise.
+            const toolConfig = tools ? toGeminiToolConfig(options === null || options === void 0 ? void 0 : options.toolChoice) : undefined;
+            // Request thought summaries so the model's official reasoning is returned
+            // (and a thinking-only turn surfaces *something* rather than going silently
+            // blank). Gemini 2.5 Pro always thinks regardless, so this only changes what
+            // is RETURNED, not what is billed — see `logTokenUsage` for the thinking-token
+            // accounting it lets us finally capture.
+            const generationConfig = { thinkingConfig: { includeThoughts: true } };
+            // Normalized [0,1] temperature → Gemini's native range, anchored so 0.5
+            // maps to its default (native 1) and 1 to its ceiling (native 2).
+            if ((options === null || options === void 0 ? void 0 : options.temperature) != null) {
+                generationConfig.temperature = scaleTemperature(options.temperature, {
+                    defaultTemp: GEMINI_DEFAULT_TEMPERATURE,
+                    maxTemp: GEMINI_MAX_TEMPERATURE,
+                });
+            }
+            // Names of the tools offered this turn — used to validate a repaired
+            // malformed call against the real tool surface before accepting it.
+            const offeredToolNames = new Set(((_b = options === null || options === void 0 ? void 0 : options.tools) !== null && _b !== void 0 ? _b : []).map((t) => t.name));
+            const response = yield this.post({ model: this.model, contents, tools, systemInstruction, toolConfig, generationConfig }, options === null || options === void 0 ? void 0 : options.signal);
+            return this.fromGeminiResponse(response, offeredToolNames);
         });
     }
     /**
@@ -143,15 +177,19 @@ export class GeminiTransport {
      * and returns the per-call total so the caller can attach it to the response
      * message.
      */
-    logTokenUsage(promptTokens, candidateTokens) {
+    logTokenUsage(promptTokens, candidateTokens, thoughtTokens) {
         const { promptPerMillion, candidatePerMillion } = estimatedGeminiPaidRatesUsdPerMillion(this.model);
         const promptCost = (promptTokens / GeminiTransport.TOKENS_PER_MILLION) * promptPerMillion;
-        const candidateCost = (candidateTokens / GeminiTransport.TOKENS_PER_MILLION) * candidatePerMillion;
+        // Thinking tokens are billed at the output (candidate) rate. They are
+        // incurred whenever the model thinks (always, on 2.5 Pro) — counting them
+        // here corrects a long-standing undercount, it does not raise the bill.
+        const candidateCost = ((candidateTokens + thoughtTokens) / GeminiTransport.TOKENS_PER_MILLION) *
+            candidatePerMillion;
         const totalCost = promptCost + candidateCost;
         this.lifetimeCostUsd += totalCost;
         console.log(`--- Gemini Token Usage (${this.model}) ---`);
         console.log(`Prompt Tokens:    ${promptTokens} ($${promptCost.toFixed(GeminiTransport.COST_DECIMAL_PLACES)})`);
-        console.log(`Candidate Tokens: ${candidateTokens} ($${candidateCost.toFixed(GeminiTransport.COST_DECIMAL_PLACES)})`);
+        console.log(`Candidate Tokens: ${candidateTokens} (+${thoughtTokens} thinking) ($${candidateCost.toFixed(GeminiTransport.COST_DECIMAL_PLACES)})`);
         console.log(`Total Cost:       $${totalCost.toFixed(GeminiTransport.COST_DECIMAL_PLACES)}`);
         console.log(`Lifetime Cost:    $${this.lifetimeCostUsd.toFixed(GeminiTransport.COST_DECIMAL_PLACES)}`);
         console.log('--------------------------');
@@ -210,27 +248,32 @@ export class GeminiTransport {
         }
         return contents;
     }
-    fromGeminiResponse(response) {
-        var _a, _b, _c, _d, _e;
+    fromGeminiResponse(response, offeredToolNames = new Set()) {
+        var _a, _b, _c, _d, _e, _f, _g, _h, _j;
         let inputTokens;
         let outputTokens;
+        let thoughtsTokens;
         let cost;
         if (response.usageMetadata) {
             const usage = response.usageMetadata;
-            cost = this.logTokenUsage((_a = usage.promptTokenCount) !== null && _a !== void 0 ? _a : 0, (_b = usage.candidatesTokenCount) !== null && _b !== void 0 ? _b : 0);
+            cost = this.logTokenUsage((_a = usage.promptTokenCount) !== null && _a !== void 0 ? _a : 0, (_b = usage.candidatesTokenCount) !== null && _b !== void 0 ? _b : 0, (_c = usage.thoughtsTokenCount) !== null && _c !== void 0 ? _c : 0);
             if (usage.promptTokenCount != null) {
                 inputTokens = usage.promptTokenCount;
             }
-            if (usage.candidatesTokenCount != null) {
-                outputTokens = usage.candidatesTokenCount;
+            if (usage.thoughtsTokenCount != null) {
+                thoughtsTokens = usage.thoughtsTokenCount;
+            }
+            if (usage.candidatesTokenCount != null || usage.thoughtsTokenCount != null) {
+                // Thinking tokens are generated output, billed at the candidate rate, but
+                // reported in a field disjoint from `candidatesTokenCount`. Fold them into
+                // the provider-agnostic `outputTokens` so it reflects the true generated
+                // total — matching Anthropic, whose `output_tokens` already includes them.
+                outputTokens = ((_d = usage.candidatesTokenCount) !== null && _d !== void 0 ? _d : 0) + ((_e = usage.thoughtsTokenCount) !== null && _e !== void 0 ? _e : 0);
             }
         }
         const candidates = response === null || response === void 0 ? void 0 : response.candidates;
         const firstCandidate = candidates === null || candidates === void 0 ? void 0 : candidates[0];
-        if ((firstCandidate === null || firstCandidate === void 0 ? void 0 : firstCandidate.finishReason) === 'MALFORMED_FUNCTION_CALL') {
-            throw new MalformedFunctionCallError(firstCandidate.finishMessage);
-        }
-        const parts = (_d = (_c = firstCandidate === null || firstCandidate === void 0 ? void 0 : firstCandidate.content) === null || _c === void 0 ? void 0 : _c.parts) !== null && _d !== void 0 ? _d : [];
+        const parts = (_g = (_f = firstCandidate === null || firstCandidate === void 0 ? void 0 : firstCandidate.content) === null || _f === void 0 ? void 0 : _f.parts) !== null && _g !== void 0 ? _g : [];
         const toolCalls = [];
         const thoughtParts = [];
         const textParts = [];
@@ -239,7 +282,7 @@ export class GeminiTransport {
                 toolCalls.push({
                     id: crypto.randomUUID(),
                     name: part.functionCall.name,
-                    args: (_e = part.functionCall.args) !== null && _e !== void 0 ? _e : {},
+                    args: (_h = part.functionCall.args) !== null && _h !== void 0 ? _h : {},
                 });
             }
             else if (part.thought && part.text) {
@@ -249,26 +292,108 @@ export class GeminiTransport {
                 textParts.push(part.text);
             }
         }
-        // For tool-call responses, surface thought parts as `content` so the UI can
-        // render them as a collapsible thinking block. Fall back to text narration
-        // (textParts) for models that don't emit native thought parts (e.g. some Flash models)
-        // but do narrate alongside tool calls when prompted to do so.
-        // For final answers, surface only the response text.
+        // Gemini sometimes flags MALFORMED_FUNCTION_CALL when it produced a complete
+        // tool call but serialised it as a Python statement. Recover it in place
+        // when we can parse it with full confidence (its `parts` are empty in this
+        // case, so the repaired call flows through the normal token/cost handling
+        // below); otherwise throw so the ChatDriver retry takes over.
+        if ((firstCandidate === null || firstCandidate === void 0 ? void 0 : firstCandidate.finishReason) === 'MALFORMED_FUNCTION_CALL') {
+            const repaired = repairMalformedFunctionCall(firstCandidate.finishMessage, offeredToolNames);
+            if (!repaired) {
+                throw new MalformedFunctionCallError(firstCandidate.finishMessage);
+            }
+            toolCalls.push({ id: crypto.randomUUID(), name: repaired.name, args: repaired.args });
+        }
+        // The model's official thinking (thought-summary parts, returned because we
+        // set `thinkingConfig.includeThoughts`) is surfaced through the SAME channel
+        // as its tool-call narration: both go in `content`, which the driver tags as
+        // a toggleable "thinking" block. We combine them so neither is dropped.
+        // NOTE: for now real thinking and narration share one block; if the real
+        // thinking proves too verbose we may want to split it into its own channel.
+        // Final answers surface only the answer text — thought summaries are
+        // deliberately not promoted to be the answer.
+        const thinking = thoughtParts.join('');
+        const narration = textParts.join('');
         const base = toolCalls.length > 0
             ? {
                 role: 'assistant',
-                content: thoughtParts.join('') || textParts.join(''),
+                content: [thinking, narration].filter(Boolean).join('\n\n'),
                 toolCalls,
             }
-            : { role: 'assistant', content: textParts.join('') };
+            : { role: 'assistant', content: narration };
+        // Blank / non-STOP finishes are the recurring failure mode (especially empty
+        // STOP on 2.5 Pro). Log the full shape so the cause is legible from the
+        // breakdown rather than inferred. MALFORMED is excluded — it has its own
+        // repair/throw path above.
+        const finishReason = firstCandidate === null || firstCandidate === void 0 ? void 0 : firstCandidate.finishReason;
+        const isBlank = toolCalls.length === 0 && !thinking && !narration;
+        if (isBlank ||
+            (finishReason && finishReason !== 'STOP' && finishReason !== 'MALFORMED_FUNCTION_CALL')) {
+            this.logAbnormalResponse(response, {
+                finishReason,
+                functionCallParts: toolCalls.length,
+                thoughtParts: thoughtParts.length,
+                textParts: textParts.length,
+            });
+        }
         if (inputTokens != null)
             base.inputTokens = inputTokens;
         if (outputTokens != null)
             base.outputTokens = outputTokens;
         if (cost != null)
             base.cost = cost;
+        // Surface the provider diagnostic on the message so the driver can fold it
+        // into the debug-log meta events (the console log above is dev-only). Only
+        // when there's signal — a finish reason, thinking tokens, or a block reason.
+        const blockReason = (_j = response.promptFeedback) === null || _j === void 0 ? void 0 : _j.blockReason;
+        if (finishReason != null || thoughtsTokens != null || blockReason != null) {
+            base.responseMeta = {
+                finishReason,
+                thoughtsTokens,
+                parts: {
+                    functionCall: toolCalls.length,
+                    thought: thoughtParts.length,
+                    text: textParts.length,
+                },
+                blockReason,
+            };
+        }
         return base;
     }
+    /**
+     * Log the full shape of a blank or non-STOP response so its cause is legible
+     * without re-deriving it: a thinking-only STOP (substantial `thoughtsTokenCount`,
+     * ~0 `candidatesTokenCount`) vs a content block (`SAFETY` / `RECITATION`) vs a
+     * token cap (`MAX_TOKENS`) vs a prompt-level block (top-level
+     * `promptFeedback.blockReason`). On 2.5 Pro — which always thinks — a blank
+     * STOP carrying substantial thought tokens is the "thought, then stopped
+     * without answering" signature.
+     */
+    logAbnormalResponse(response, parsed) {
+        var _a, _b, _c;
+        const usage = ((_a = response.usageMetadata) !== null && _a !== void 0 ? _a : {});
+        const candidate = (_c = (_b = response.candidates) === null || _b === void 0 ? void 0 : _b[0]) !== null && _c !== void 0 ? _c : {};
+        logger.warn('GeminiTransport: blank/non-STOP response', {
+            model: this.model,
+            finishReason: parsed.finishReason,
+            finishMessage: candidate.finishMessage,
+            parts: {
+                total: parsed.functionCallParts + parsed.thoughtParts + parsed.textParts,
+                functionCall: parsed.functionCallParts,
+                thought: parsed.thoughtParts,
+                text: parsed.textParts,
+            },
+            usage: {
+                promptTokenCount: usage.promptTokenCount,
+                candidatesTokenCount: usage.candidatesTokenCount,
+                thoughtsTokenCount: usage.thoughtsTokenCount,
+                totalTokenCount: usage.totalTokenCount,
+            },
+            promptFeedback: response.promptFeedback,
+            safetyRatings: candidate.safetyRatings,
+            citationMetadata: candidate.citationMetadata,
+        });
+    }
     buildEndpoint(body) {
         if (this.apiKey) {
             return {

package/dist/esm/utils/temperature.js ADDED Viewed

@@ -0,0 +1,56 @@
+/**
+ * Normalized-temperature anchor. Values at or below it scale into
+ * `[0, providerDefault]`; values above it scale into `[providerDefault, providerMax]`.
+ * So a normalized `0.5` maps to each provider's own default temperature.
+ */
+const DEFAULT_ANCHOR = 0.5;
+/**
+ * Translate a provider-agnostic, normalized temperature into a provider's
+ * native range, anchored on the provider's own default so the normalized scale
+ * means the same thing everywhere:
+ *
+ * - `0` → `0` (fully deterministic),
+ * - `0.5` → the provider's default temperature,
+ * - `1` → the provider's maximum.
+ *
+ * Values between the anchors interpolate linearly within each half, so `< 0.5`
+ * is "more focused than default" and `> 0.5` is "more random than default" on
+ * every provider — even though the providers' native ranges differ (Anthropic
+ * default/max `1`/`1`, Gemini `1`/`2`). Where a provider's default equals its
+ * max (Anthropic), the upper half is flat: it cannot go hotter than its default.
+ *
+ * The input is clamped to `[0, 1]` before scaling, so an out-of-range value can
+ * never produce a native temperature the provider would reject.
+ */
+export function scaleTemperature(normalized, { defaultTemp, maxTemp }) {
+    const t = Math.min(Math.max(normalized, 0), 1);
+    return t <= DEFAULT_ANCHOR
+        ? (t / DEFAULT_ANCHOR) * defaultTemp
+        : defaultTemp + ((t - DEFAULT_ANCHOR) / (1 - DEFAULT_ANCHOR)) * (maxTemp - defaultTemp);
+}
+/**
+ * Provider-agnostic, normalized sampling-temperature presets in `0`–`1` space —
+ * named handles for the values most callers actually want, so intent reads
+ * better than a bare magnitude. Each maps through `scaleTemperature` to the
+ * active provider's native range, so the same preset means the same intent
+ * whichever provider services the turn:
+ *
+ * - `ChatTemperature.Deterministic` (`0`) — greedy/argmax sampling.
+ * - `ChatTemperature.Focused` (`0.25`) — low but not greedy; precise tool calls
+ *   and extraction work where you still want a little slack.
+ * - `ChatTemperature.Balanced` (`0.5`) — the provider's own default.
+ * - `ChatTemperature.Creative` (`0.75`) — hotter than default, short of the ceiling.
+ * - `ChatTemperature.Maximum` (`1`) — the hottest the active provider allows.
+ *
+ * (On a provider whose default equals its max — Anthropic — `Creative` and
+ * `Maximum` coincide; see `scaleTemperature`.)
+ *
+ * @beta
+ */
+export const ChatTemperature = {
+    Deterministic: 0,
+    Focused: 0.25,
+    Balanced: 0.5,
+    Creative: 0.75,
+    Maximum: 1,
+};

package/dist/foundation-ai.api.json CHANGED Viewed

@@ -3282,6 +3282,34 @@
                 "endIndex": 2
               }
             },
+            {
+              "kind": "PropertySignature",
+              "canonicalReference": "@genesislcap/foundation-ai!ChatMessage#responseMeta:member",
+              "docComment": "/**\n * Provider diagnostic for the request that produced this message — the raw finish reason plus, where the provider reports them, the reasoning-token count and a parts breakdown. Set by transports that expose it (Gemini); the driver folds it into the debug-log meta events (e.g. an empty-response `turn.retry`/`turn.error`) so a blank or truncated turn's cause is legible. Not shown to the user.\n */\n",
+              "excerptTokens": [
+                {
+                  "kind": "Content",
+                  "text": "responseMeta?: "
+                },
+                {
+                  "kind": "Reference",
+                  "text": "ChatResponseMeta",
+                  "canonicalReference": "@genesislcap/foundation-ai!ChatResponseMeta:interface"
+                },
+                {
+                  "kind": "Content",
+                  "text": ";"
+                }
+              ],
+              "isReadonly": false,
+              "isOptional": true,
+              "releaseTag": "Beta",
+              "name": "responseMeta",
+              "propertyTypeTokenRange": {
+                "startIndex": 1,
+                "endIndex": 2
+              }
+            },
             {
               "kind": "PropertySignature",
               "canonicalReference": "@genesislcap/foundation-ai!ChatMessage#role:member",
@@ -3529,18 +3557,46 @@
                 "endIndex": 2
               }
             },
+            {
+              "kind": "PropertySignature",
+              "canonicalReference": "@genesislcap/foundation-ai!ChatRequestOptions#temperature:member",
+              "docComment": "/**\n * Provider-agnostic sampling temperature, normalized to `0`–`1` and anchored on each provider's own default: `0` is fully deterministic, `0.5` is the provider's default, and `1` is the most random it allows. So `< 0.5` is \"more focused than default\" and `> 0.5` is \"more random than default\" on every provider, even though their native ranges differ (Anthropic `temperature` `0`–`1`, Gemini `generationConfig.temperature` `0`–`2`). Where a provider's default equals its max (Anthropic), the upper half is flat. Values outside `0`–`1` are clamped. Omit to use the provider/model default (equivalent to `0.5`). Prefer the `ChatTemperature` presets for common intents.\n *\n * @beta\n */\n",
+              "excerptTokens": [
+                {
+                  "kind": "Content",
+                  "text": "temperature?: "
+                },
+                {
+                  "kind": "Content",
+                  "text": "number"
+                },
+                {
+                  "kind": "Content",
+                  "text": ";"
+                }
+              ],
+              "isReadonly": false,
+              "isOptional": true,
+              "releaseTag": "Beta",
+              "name": "temperature",
+              "propertyTypeTokenRange": {
+                "startIndex": 1,
+                "endIndex": 2
+              }
+            },
             {
               "kind": "PropertySignature",
               "canonicalReference": "@genesislcap/foundation-ai!ChatRequestOptions#toolChoice:member",
-              "docComment": "/**\n * Whether the model MAY call a tool (`'auto'`, the default when omitted) or MUST call one (`'required'`). `'required'` maps to Anthropic `tool_choice: { type: 'any' }` and Gemini `functionCallingConfig.mode: 'ANY'`. Used by sub-agent loops so a sub-agent can only end a turn by calling a tool (e.g. its completion tool), never by emitting a free-text answer.\n *\n * NOTE: `'required'` is incompatible with Anthropic extended/adaptive thinking — a request must not enable both.\n *\n * @beta\n */\n",
+              "docComment": "/**\n * Whether (and how) the model may call a tool this turn. Defaults to `'auto'` when omitted. Used by sub-agent loops (which force `'required'` so a turn can only end via a tool call) and configurable per agent / per agent state. See {@link ChatToolChoice}.\n *\n * @beta\n */\n",
               "excerptTokens": [
                 {
                   "kind": "Content",
                   "text": "toolChoice?: "
                 },
                 {
-                  "kind": "Content",
-                  "text": "'auto' | 'required'"
+                  "kind": "Reference",
+                  "text": "ChatToolChoice",
+                  "canonicalReference": "@genesislcap/foundation-ai!ChatToolChoice:type"
                 },
                 {
                   "kind": "Content",
@@ -3591,6 +3647,132 @@
           ],
           "extendsTokenRanges": []
         },
+        {
+          "kind": "Interface",
+          "canonicalReference": "@genesislcap/foundation-ai!ChatResponseMeta:interface",
+          "docComment": "/**\n * Provider-reported diagnostic for a single chat response, surfaced so the driver can attribute a blank/abnormal turn without re-deriving it. All fields are optional — a transport sets only what its provider reports.\n *\n * @beta\n */\n",
+          "excerptTokens": [
+            {
+              "kind": "Content",
+              "text": "export interface ChatResponseMeta "
+            }
+          ],
+          "fileUrlPath": "src/types/chat.types.ts",
+          "releaseTag": "Beta",
+          "name": "ChatResponseMeta",
+          "preserveMemberOrder": false,
+          "members": [
+            {
+              "kind": "PropertySignature",
+              "canonicalReference": "@genesislcap/foundation-ai!ChatResponseMeta#blockReason:member",
+              "docComment": "/**\n * Provider block reason when the prompt/response was blocked rather than generated.\n */\n",
+              "excerptTokens": [
+                {
+                  "kind": "Content",
+                  "text": "blockReason?: "
+                },
+                {
+                  "kind": "Content",
+                  "text": "string"
+                },
+                {
+                  "kind": "Content",
+                  "text": ";"
+                }
+              ],
+              "isReadonly": false,
+              "isOptional": true,
+              "releaseTag": "Beta",
+              "name": "blockReason",
+              "propertyTypeTokenRange": {
+                "startIndex": 1,
+                "endIndex": 2
+              }
+            },
+            {
+              "kind": "PropertySignature",
+              "canonicalReference": "@genesislcap/foundation-ai!ChatResponseMeta#finishReason:member",
+              "docComment": "/**\n * Raw provider finish reason for the turn, verbatim — e.g. Gemini `'STOP'` | `'MAX_TOKENS'` | `'SAFETY'` | `'RECITATION'`. The key signal when triaging a blank or truncated turn.\n */\n",
+              "excerptTokens": [
+                {
+                  "kind": "Content",
+                  "text": "finishReason?: "
+                },
+                {
+                  "kind": "Content",
+                  "text": "string"
+                },
+                {
+                  "kind": "Content",
+                  "text": ";"
+                }
+              ],
+              "isReadonly": false,
+              "isOptional": true,
+              "releaseTag": "Beta",
+              "name": "finishReason",
+              "propertyTypeTokenRange": {
+                "startIndex": 1,
+                "endIndex": 2
+              }
+            },
+            {
+              "kind": "PropertySignature",
+              "canonicalReference": "@genesislcap/foundation-ai!ChatResponseMeta#parts:member",
+              "docComment": "/**\n * Count of response parts by kind — distinguishes a truly empty turn from a thinking-only one.\n */\n",
+              "excerptTokens": [
+                {
+                  "kind": "Content",
+                  "text": "parts?: "
+                },
+                {
+                  "kind": "Content",
+                  "text": "{\n        functionCall: number;\n        thought: number;\n        text: number;\n    }"
+                },
+                {
+                  "kind": "Content",
+                  "text": ";"
+                }
+              ],
+              "isReadonly": false,
+              "isOptional": true,
+              "releaseTag": "Beta",
+              "name": "parts",
+              "propertyTypeTokenRange": {
+                "startIndex": 1,
+                "endIndex": 2
+              }
+            },
+            {
+              "kind": "PropertySignature",
+              "canonicalReference": "@genesislcap/foundation-ai!ChatResponseMeta#thoughtsTokens:member",
+              "docComment": "/**\n * Reasoning (\"thinking\") tokens billed for this turn, when the provider reports them (Gemini 2.5). A high count alongside ~0 output tokens and a `'STOP'` finish is the \"thought, then stopped without answering\" signature.\n */\n",
+              "excerptTokens": [
+                {
+                  "kind": "Content",
+                  "text": "thoughtsTokens?: "
+                },
+                {
+                  "kind": "Content",
+                  "text": "number"
+                },
+                {
+                  "kind": "Content",
+                  "text": ";"
+                }
+              ],
+              "isReadonly": false,
+              "isOptional": true,
+              "releaseTag": "Beta",
+              "name": "thoughtsTokens",
+              "propertyTypeTokenRange": {
+                "startIndex": 1,
+                "endIndex": 2
+              }
+            }
+          ],
+          "extendsTokenRanges": []
+        },
         {
           "kind": "TypeAlias",
           "canonicalReference": "@genesislcap/foundation-ai!ChatRole:type",
@@ -3747,6 +3929,29 @@
             "endIndex": 2
           }
         },
+        {
+          "kind": "Variable",
+          "canonicalReference": "@genesislcap/foundation-ai!ChatTemperature:var",
+          "docComment": "/**\n * Provider-agnostic, normalized sampling-temperature presets in `0`–`1` space — named handles for the values most callers actually want, so intent reads better than a bare magnitude. Each maps through `scaleTemperature` to the active provider's native range, so the same preset means the same intent whichever provider services the turn:\n *\n * - `ChatTemperature.Deterministic` (`0`) — greedy/argmax sampling. - `ChatTemperature.Focused` (`0.25`) — low but not greedy; precise tool calls and extraction work where you still want a little slack. - `ChatTemperature.Balanced` (`0.5`) — the provider's own default. - `ChatTemperature.Creative` (`0.75`) — hotter than default, short of the ceiling. - `ChatTemperature.Maximum` (`1`) — the hottest the active provider allows.\n *\n * (On a provider whose default equals its max — Anthropic — `Creative` and `Maximum` coincide; see `scaleTemperature`.)\n *\n * @beta\n */\n",
+          "excerptTokens": [
+            {
+              "kind": "Content",
+              "text": "ChatTemperature: "
+            },
+            {
+              "kind": "Content",
+              "text": "{\n    readonly Deterministic: 0;\n    readonly Focused: 0.25;\n    readonly Balanced: 0.5;\n    readonly Creative: 0.75;\n    readonly Maximum: 1;\n}"
+            }
+          ],
+          "fileUrlPath": "src/utils/temperature.ts",
+          "isReadonly": true,
+          "releaseTag": "Beta",
+          "name": "ChatTemperature",
+          "variableTypeTokenRange": {
+            "startIndex": 1,
+            "endIndex": 2
+          }
+        },
         {
           "kind": "TypeAlias",
           "canonicalReference": "@genesislcap/foundation-ai!ChatToolCall:type",
@@ -3814,6 +4019,32 @@
             "endIndex": 3
           }
         },
+        {
+          "kind": "TypeAlias",
+          "canonicalReference": "@genesislcap/foundation-ai!ChatToolChoice:type",
+          "docComment": "/**\n * Controls whether (and how) the model may call a tool on a given turn. Maps to each provider's \"tool choice\" / \"function calling mode\" control:\n *\n * - `'auto'` (the default when omitted) — the model decides whether to call a tool or answer with text. Anthropic leaves `tool_choice` unset; Gemini leaves `functionCallingConfig` unset (`AUTO`). - `'required'` — the model MUST call one of the available tools. Maps to Anthropic `tool_choice: { type: 'any' }` and Gemini `functionCallingConfig.mode: 'ANY'`. - `'none'` — the model MUST NOT call a tool (text answer only). Maps to Anthropic `tool_choice: { type: 'none' }` and Gemini `functionCallingConfig.mode: 'NONE'`. - `{ tool: name }` — the model MUST call exactly the named tool. Maps to Anthropic `tool_choice: { type: 'tool', name }` and Gemini `functionCallingConfig.mode: 'ANY', allowedFunctionNames: [name]`. Use this for surgical forcing at a single-tool juncture (e.g. force a classifier tool in an intake step) while leaving `'auto'` everywhere multi-step work happens.\n *\n * Forcing (`'required'` / `{ tool }`) is a no-op when no tools are advertised.\n *\n * NOTE: forcing is incompatible with Anthropic extended/adaptive thinking — a request must not enable both.\n *\n * @beta\n */\n",
+          "excerptTokens": [
+            {
+              "kind": "Content",
+              "text": "export type ChatToolChoice = "
+            },
+            {
+              "kind": "Content",
+              "text": "'auto' | 'required' | 'none' | {\n    tool: string;\n}"
+            },
+            {
+              "kind": "Content",
+              "text": ";"
+            }
+          ],
+          "fileUrlPath": "src/types/chat.types.ts",
+          "releaseTag": "Beta",
+          "name": "ChatToolChoice",
+          "typeTokenRange": {
+            "startIndex": 1,
+            "endIndex": 2
+          }
+        },
         {
           "kind": "Interface",
           "canonicalReference": "@genesislcap/foundation-ai!ChatToolDefinition:interface",