npm - @carlonicora/nestjs-neo4jsonapi - Versions diffs - 1.64.0 → 1.65.0 - Mend

@carlonicora/nestjs-neo4jsonapi 1.64.0 → 1.65.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (233) hide show

package/dist/core/llm/services/llm.service.js CHANGED Viewed

@@ -17,6 +17,7 @@ const config_1 = require("@nestjs/config");
 const agentmessage_type_1 = require("../../../common/enums/agentmessage.type");
 const model_service_1 = require("../../llm/services/model.service");
 const schema_utils_1 = require("../../llm/utils/schema.utils");
+const llm_call_dumper_service_1 = require("./llm-call-dumper.service");
 /**
  * Type guard to validate raw response structure
  */
@@ -24,9 +25,10 @@ function isValidRaw(raw) {
     return typeof raw === "object" && raw !== null;
 }
 let LLMService = class LLMService {
-    constructor(modelService, config) {
+    constructor(modelService, config, dumper) {
         this.modelService = modelService;
         this.config = config;
+        this.dumper = dumper;
         this._sessionTokens = {
             input: 0,
             output: 0,
@@ -268,244 +270,325 @@ let LLMService = class LLMService {
      * ```
      */
     async call(params) {
+        const aiConfig = this.config.get("ai").ai;
+        const session = this.dumper.startSession({
+            metadata: params.metadata,
+            model: aiConfig.model,
+            provider: aiConfig.provider,
+            temperature: params.temperature,
+        });
+        let totalInput = 0;
+        let totalOutput = 0;
+        const parseFallbacks = [];
+        const warnings = [];
         try {
-            // Optional: Validate input parameters against schema
-            if (params.inputSchema && params.validateInput) {
-                try {
-                    params.inputParams = params.inputSchema.parse(params.inputParams);
-                }
-                catch (validationError) {
-                    console.error("[LLMService] Input validation failed:", validationError);
-                    throw new Error(`Invalid input parameters: ${validationError instanceof Error ? validationError.message : "Unknown validation error"}`);
-                }
-            }
-            // Create messages with modern MessagesPlaceholder pattern (with schema-guided instructions)
-            const { template, historyMessages } = this._createMessages({
-                systemPrompts: params.systemPrompts,
-                instructions: params.instructions,
-                inputParams: params.inputParams,
-                inputSchema: params.inputSchema,
-                history: params.history,
-                maxHistoryMessages: params.maxHistoryMessages,
+            const result = await this._invokeOriginal(params, session, (i, o) => {
+                totalInput += i;
+                totalOutput += o;
+            }, (kind) => parseFallbacks.push(kind), (w) => warnings.push(w));
+            session.close({
+                finalStatus: "success",
+                totalTokens: { input: totalInput, output: totalOutput },
+                warnings,
+                parseFallbacks,
             });
-            const prompt = prompts_1.ChatPromptTemplate.fromMessages(template);
-            // Get base model
-            const baseModel = this.modelService.getLLM({
-                temperature: params.temperature,
-            });
-            // Build config options for the invocation
-            const configOptions = {};
-            if (params.maxTokens)
-                configOptions.maxTokens = params.maxTokens;
-            if (params.stopSequences)
-                configOptions.stop = params.stopSequences;
-            if (params.metadata)
-                configOptions.metadata = params.metadata;
-            if (params.timeout)
-                configOptions.timeout = params.timeout;
-            // Track token usage across tool iterations
-            let totalInputTokens = 0;
-            let totalOutputTokens = 0;
-            // Build initial messages for the conversation
-            const conversationMessages = await prompt.formatMessages({
-                ...params.inputParams,
-                chat_history: historyMessages,
+            return result;
+        }
+        catch (error) {
+            const message = error instanceof Error ? error.message : String(error);
+            const stack = error instanceof Error ? (error.stack ?? "").split("\n").slice(0, 10).join("\n") : undefined;
+            session.close({
+                finalStatus: "error",
+                errorMessage: message,
+                errorStack: stack,
+                totalTokens: { input: totalInput, output: totalOutput },
+                warnings,
+                parseFallbacks,
             });
-            // If tools are provided, handle tool calling loop
-            if (params.tools && params.tools.length > 0) {
-                const maxIterations = params.maxToolIterations ?? 5;
-                // Build tool map for execution
-                const toolMap = new Map();
-                for (const tool of params.tools) {
-                    toolMap.set(tool.name, tool);
-                }
-                // Bind tools to model
-                const modelWithTools = baseModel.bindTools(params.tools);
-                // Tool calling loop
-                for (let iteration = 0; iteration < maxIterations; iteration++) {
-                    // Call model with tools
-                    const toolResponse = Object.keys(configOptions).length > 0
-                        ? await modelWithTools.invoke(conversationMessages, configOptions)
-                        : await modelWithTools.invoke(conversationMessages);
-                    // Track token usage
-                    const responseUsage = toolResponse.usage_metadata;
-                    if (responseUsage) {
-                        totalInputTokens += responseUsage.input_tokens ?? 0;
-                        totalOutputTokens += responseUsage.output_tokens ?? 0;
-                    }
-                    // Check for tool calls
-                    const toolCalls = toolResponse.tool_calls ?? [];
-                    if (toolCalls.length === 0) {
-                        // No more tool calls - break to get final structured response
-                        break;
-                    }
-                    // Add AI message with tool calls to conversation
-                    conversationMessages.push(toolResponse);
-                    // Execute each tool call
-                    for (const toolCall of toolCalls) {
-                        const tool = toolMap.get(toolCall.name);
-                        if (!tool) {
-                            console.warn(`[LLMService] Tool not found: ${toolCall.name}`);
-                            conversationMessages.push(new messages_1.ToolMessage({
-                                content: `Tool "${toolCall.name}" not found`,
-                                tool_call_id: toolCall.id ?? "",
-                            }));
-                            continue;
-                        }
-                        try {
-                            const result = await tool.invoke(toolCall.args);
-                            const resultStr = typeof result === "string" ? result : JSON.stringify(result);
-                            conversationMessages.push(new messages_1.ToolMessage({
-                                content: resultStr,
-                                tool_call_id: toolCall.id ?? "",
-                            }));
-                        }
-                        catch (error) {
-                            console.error(`[LLMService] Tool error: ${toolCall.name}`, error);
-                            conversationMessages.push(new messages_1.ToolMessage({
-                                content: `Tool error: ${error instanceof Error ? error.message : "Unknown error"}`,
-                                tool_call_id: toolCall.id ?? "",
-                            }));
-                        }
-                    }
-                }
-            }
-            // Nudge the model out of tool-use mode before asking for the final structured
-            // answer. Without this, some models (notably gpt-oss) emit another tool_calls
-            // response instead of producing the structured output, and parsing fails with
-            // "No content" / finish_reason=tool_calls. The nudge is only appended when the
-            // tool-calling loop ran at all.
-            if (params.tools && params.tools.length > 0 && conversationMessages.length > 0) {
-                conversationMessages.push(new messages_1.HumanMessage("You have gathered enough information from the tool calls above to answer the user's question. Produce your final answer now as the structured output the system expects. Do not request any further tool calls."));
-            }
-            // Get final structured response (unified path for both tool and non-tool flows)
-            // For Requesty + Gemini: sanitize schema to remove $schema, $defs, etc. that Gemini rejects
-            const aiConfig = this.config.get("ai").ai;
-            // Check if model is Gemini (handles both "gemini-..." and "google/gemini-..." formats)
-            const modelLower = aiConfig.model.toLowerCase();
-            const isGeminiModel = modelLower.startsWith("gemini") || modelLower.includes("/gemini");
-            const needsGeminiSanitization = aiConfig.provider === "requesty" && isGeminiModel;
-            let structuredLlm;
-            if (needsGeminiSanitization) {
-                // Convert Zod to JSON Schema and remove Gemini-incompatible properties
-                const jsonSchema = (0, schema_utils_1.convertZodToJsonSchema)(params.outputSchema);
-                const sanitizedSchema = (0, schema_utils_1.sanitizeSchemaForGemini)(jsonSchema);
-                structuredLlm = baseModel.withStructuredOutput(sanitizedSchema, {
-                    includeRaw: true,
-                });
+            console.error("[LLMService] Error calling LLM:", error);
+            throw new Error(`LLM service error: ${message}`);
+        }
+    }
+    async _invokeOriginal(params, session, addTokens, addParseFallback, addWarning) {
+        // Optional: Validate input parameters against schema
+        if (params.inputSchema && params.validateInput) {
+            try {
+                params.inputParams = params.inputSchema.parse(params.inputParams);
             }
-            else {
-                // All other providers: use Zod schema directly
-                structuredLlm = baseModel.withStructuredOutput(params.outputSchema, {
-                    includeRaw: true,
-                });
+            catch (validationError) {
+                console.error("[LLMService] Input validation failed:", validationError);
+                throw new Error(`Invalid input parameters: ${validationError instanceof Error ? validationError.message : "Unknown validation error"}`);
             }
-            const response = (await structuredLlm.invoke(conversationMessages, Object.keys(configOptions).length > 0 ? configOptions : undefined));
-            // Extract token usage with type guard (includes tool iteration tokens)
-            const raw = isValidRaw(response.raw) ? response.raw : undefined;
-            const input = totalInputTokens + (raw?.usage_metadata?.input_tokens ?? 0);
-            const output = totalOutputTokens + (raw?.usage_metadata?.output_tokens ?? 0);
-            // Update session tracking
-            this._sessionTokens.input += input;
-            this._sessionTokens.output += output;
-            this._sessionTokens.total += input + output;
-            this._sessionTokens.callCount += 1;
-            // Warn if high token usage
-            const totalTokens = input + output;
-            if (totalTokens > 8000) {
-                console.warn(`[LLMService] High token usage detected: ${totalTokens} tokens in this call`);
+        }
+        // Create messages with modern MessagesPlaceholder pattern (with schema-guided instructions)
+        const { template, historyMessages } = this._createMessages({
+            systemPrompts: params.systemPrompts,
+            instructions: params.instructions,
+            inputParams: params.inputParams,
+            inputSchema: params.inputSchema,
+            history: params.history,
+            maxHistoryMessages: params.maxHistoryMessages,
+        });
+        const prompt = prompts_1.ChatPromptTemplate.fromMessages(template);
+        // Get base model
+        const baseModel = this.modelService.getLLM({
+            temperature: params.temperature,
+        });
+        // Build config options for the invocation
+        const configOptions = {};
+        if (params.maxTokens)
+            configOptions.maxTokens = params.maxTokens;
+        if (params.stopSequences)
+            configOptions.stop = params.stopSequences;
+        if (params.metadata)
+            configOptions.metadata = params.metadata;
+        if (params.timeout)
+            configOptions.timeout = params.timeout;
+        // Track token usage across tool iterations
+        let totalInputTokens = 0;
+        let totalOutputTokens = 0;
+        // Build initial messages for the conversation
+        const conversationMessages = await prompt.formatMessages({
+            ...params.inputParams,
+            chat_history: historyMessages,
+        });
+        session.recordInputs({
+            systemPrompts: params.systemPrompts,
+            instructions: params.instructions ?? this._generateSchemaGuidedInstructions(params.inputParams, params.inputSchema),
+            inputParams: params.inputParams,
+            history: (params.history ?? []).map((h) => ({ role: String(h.role), content: h.content })),
+            tools: (params.tools ?? []).map((t) => ({
+                name: t.name,
+                description: t.description,
+                schema: t.schema,
+            })),
+            outputSchemaName: params.outputSchema?.constructor?.name ?? "outputSchema",
+        });
+        // If tools are provided, handle tool calling loop
+        if (params.tools && params.tools.length > 0) {
+            const maxIterations = params.maxToolIterations ?? 5;
+            // Build tool map for execution
+            const toolMap = new Map();
+            for (const tool of params.tools) {
+                toolMap.set(tool.name, tool);
             }
-            // Enhanced error handling with detailed diagnostics
-            if (!response.parsed) {
-                const rawContent = raw?.content || "No content";
-                const finishReason = raw?.response_metadata?.finish_reason;
-                console.error("[LLMService] Parsing failed:", {
-                    rawContentPreview: rawContent.substring(0, 500),
-                    finishReason,
-                    schemaName: params.outputSchema.constructor.name,
+            // Bind tools to model
+            const modelWithTools = baseModel.bindTools(params.tools);
+            // Tool calling loop
+            for (let iteration = 0; iteration < maxIterations; iteration++) {
+                session.startIteration("tool-loop", conversationMessages);
+                // Call model with tools
+                const toolResponse = Object.keys(configOptions).length > 0
+                    ? await modelWithTools.invoke(conversationMessages, configOptions)
+                    : await modelWithTools.invoke(conversationMessages);
+                session.recordResponse({
+                    content: typeof toolResponse.content === "string" ? toolResponse.content : "",
+                    toolCalls: (toolResponse.tool_calls ?? []).map((c) => ({
+                        id: c.id ?? "",
+                        name: c.name,
+                        args: c.args,
+                    })),
+                    tokenUsage: {
+                        input: toolResponse.usage_metadata?.input_tokens ?? 0,
+                        output: toolResponse.usage_metadata?.output_tokens ?? 0,
+                    },
+                    finishReason: toolResponse.response_metadata?.finish_reason,
                 });
-                // Attempt fallback parsing from tool_calls first (Azure/OpenAI function calling puts structured data here)
-                const rawAnyFallback = raw;
-                const toolCallArgs = rawAnyFallback?.tool_calls?.[0]?.args;
-                if (toolCallArgs && typeof toolCallArgs === "object") {
+                // Track token usage
+                const responseUsage = toolResponse.usage_metadata;
+                if (responseUsage) {
+                    totalInputTokens += responseUsage.input_tokens ?? 0;
+                    totalOutputTokens += responseUsage.output_tokens ?? 0;
+                }
+                // Check for tool calls
+                const toolCalls = toolResponse.tool_calls ?? [];
+                if (toolCalls.length === 0) {
+                    // No more tool calls - break to get final structured response
+                    break;
+                }
+                // Add AI message with tool calls to conversation
+                conversationMessages.push(toolResponse);
+                // Execute each tool call
+                for (const toolCall of toolCalls) {
+                    const tool = toolMap.get(toolCall.name);
+                    if (!tool) {
+                        console.warn(`[LLMService] Tool not found: ${toolCall.name}`);
+                        conversationMessages.push(new messages_1.ToolMessage({
+                            content: `Tool "${toolCall.name}" not found`,
+                            tool_call_id: toolCall.id ?? "",
+                        }));
+                        session.recordToolResult(toolCall.id ?? "", toolCall.name, `Tool "${toolCall.name}" not found`);
+                        continue;
+                    }
                     try {
-                        console.warn("[LLMService] Attempting fallback parsing from tool_calls args");
-                        const validated = params.outputSchema.parse(toolCallArgs);
-                        console.warn("[LLMService] Fallback tool_calls parsing succeeded");
-                        return {
-                            ...validated,
-                            tokenUsage: { input, output },
-                        };
+                        const result = await tool.invoke(toolCall.args);
+                        const resultStr = typeof result === "string" ? result : JSON.stringify(result);
+                        conversationMessages.push(new messages_1.ToolMessage({
+                            content: resultStr,
+                            tool_call_id: toolCall.id ?? "",
+                        }));
+                        session.recordToolResult(toolCall.id ?? "", toolCall.name, resultStr);
                     }
-                    catch (_toolCallFallbackError) {
-                        // Lenient fallback: filter out malformed array entries from tool_calls args
-                        // This handles cases where the model returns mostly valid data with a few corrupt entries
-                        try {
-                            console.warn("[LLMService] Attempting lenient tool_calls parsing (filtering invalid array entries)");
-                            const cleanedArgs = { ...toolCallArgs };
-                            const shape = params.outputSchema?.shape;
-                            if (shape) {
-                                for (const [key, fieldSchema] of Object.entries(shape)) {
-                                    if (Array.isArray(cleanedArgs[key])) {
-                                        // In Zod v4, ZodArray exposes .element as the element schema with .safeParse()
-                                        // Unwrap optional/default/nullable wrappers first if present
-                                        let schema = fieldSchema;
-                                        while (schema?.unwrap && !schema?.element) {
-                                            schema = schema.unwrap();
-                                        }
-                                        const elementSchema = schema?.element;
-                                        if (elementSchema && typeof elementSchema.safeParse === "function") {
-                                            const original = cleanedArgs[key];
-                                            cleanedArgs[key] = original.filter((entry) => elementSchema.safeParse(entry).success);
-                                            if (cleanedArgs[key].length < original.length) {
-                                                console.warn(`[LLMService] Filtered ${original.length - cleanedArgs[key].length}/${original.length} invalid entries from "${key}"`);
-                                            }
-                                        }
-                                    }
-                                }
-                            }
-                            const validated = params.outputSchema.parse(cleanedArgs);
-                            console.warn("[LLMService] Lenient tool_calls parsing succeeded");
-                            return {
-                                ...validated,
-                                tokenUsage: { input, output },
-                            };
-                        }
-                        catch {
-                            // Fall through to raw content parsing
-                        }
+                    catch (error) {
+                        console.error(`[LLMService] Tool error: ${toolCall.name}`, error);
+                        conversationMessages.push(new messages_1.ToolMessage({
+                            content: `Tool error: ${error instanceof Error ? error.message : "Unknown error"}`,
+                            tool_call_id: toolCall.id ?? "",
+                        }));
+                        session.recordToolResult(toolCall.id ?? "", toolCall.name, `Tool error: ${error instanceof Error ? error.message : "Unknown error"}`);
                     }
                 }
-                // Attempt fallback parsing from raw content
+            }
+        }
+        // Nudge the model out of tool-use mode before asking for the final structured
+        // answer. Without this, some models (notably gpt-oss) emit another tool_calls
+        // response instead of producing the structured output, and parsing fails with
+        // "No content" / finish_reason=tool_calls. The nudge is only appended when the
+        // tool-calling loop ran at all.
+        if (params.tools && params.tools.length > 0 && conversationMessages.length > 0) {
+            conversationMessages.push(new messages_1.HumanMessage("You have gathered enough information from the tool calls above to answer the user's question. Produce your final answer now as the structured output the system expects. Do not request any further tool calls."));
+        }
+        // Get final structured response (unified path for both tool and non-tool flows)
+        // For Requesty + Gemini: sanitize schema to remove $schema, $defs, etc. that Gemini rejects
+        const aiConfig = this.config.get("ai").ai;
+        // Check if model is Gemini (handles both "gemini-..." and "google/gemini-..." formats)
+        const modelLower = aiConfig.model.toLowerCase();
+        const isGeminiModel = modelLower.startsWith("gemini") || modelLower.includes("/gemini");
+        const needsGeminiSanitization = aiConfig.provider === "requesty" && isGeminiModel;
+        let structuredLlm;
+        if (needsGeminiSanitization) {
+            // Convert Zod to JSON Schema and remove Gemini-incompatible properties
+            const jsonSchema = (0, schema_utils_1.convertZodToJsonSchema)(params.outputSchema);
+            const sanitizedSchema = (0, schema_utils_1.sanitizeSchemaForGemini)(jsonSchema);
+            structuredLlm = baseModel.withStructuredOutput(sanitizedSchema, {
+                includeRaw: true,
+            });
+        }
+        else {
+            // All other providers: use Zod schema directly
+            structuredLlm = baseModel.withStructuredOutput(params.outputSchema, {
+                includeRaw: true,
+            });
+        }
+        session.startIteration("final-structured", conversationMessages);
+        const response = (await structuredLlm.invoke(conversationMessages, Object.keys(configOptions).length > 0 ? configOptions : undefined));
+        // Extract token usage with type guard (includes tool iteration tokens)
+        const raw = isValidRaw(response.raw) ? response.raw : undefined;
+        session.recordResponse({
+            content: typeof raw?.content === "string" ? raw.content : "",
+            tokenUsage: {
+                input: raw?.usage_metadata?.input_tokens ?? 0,
+                output: raw?.usage_metadata?.output_tokens ?? 0,
+            },
+            finishReason: raw?.response_metadata?.finish_reason,
+        });
+        const input = totalInputTokens + (raw?.usage_metadata?.input_tokens ?? 0);
+        const output = totalOutputTokens + (raw?.usage_metadata?.output_tokens ?? 0);
+        // Update session tracking
+        this._sessionTokens.input += input;
+        this._sessionTokens.output += output;
+        this._sessionTokens.total += input + output;
+        this._sessionTokens.callCount += 1;
+        // Warn if high token usage
+        const totalTokens = input + output;
+        if (totalTokens > 8000) {
+            const msg = `High token usage detected: ${totalTokens} tokens in this call`;
+            console.warn(`[LLMService] ${msg}`);
+            addWarning(msg);
+        }
+        // Enhanced error handling with detailed diagnostics
+        if (!response.parsed) {
+            const rawContent = raw?.content || "No content";
+            const finishReason = raw?.response_metadata?.finish_reason;
+            console.error("[LLMService] Parsing failed:", {
+                rawContentPreview: rawContent.substring(0, 500),
+                finishReason,
+                schemaName: params.outputSchema.constructor.name,
+            });
+            // Attempt fallback parsing from tool_calls first (Azure/OpenAI function calling puts structured data here)
+            const rawAnyFallback = raw;
+            const toolCallArgs = rawAnyFallback?.tool_calls?.[0]?.args;
+            if (toolCallArgs && typeof toolCallArgs === "object") {
+                addParseFallback("tool_calls");
                 try {
-                    console.warn("[LLMService] Attempting fallback JSON parsing");
-                    const manualParse = JSON.parse(rawContent);
-                    const validated = params.outputSchema.parse(manualParse);
-                    console.warn("[LLMService] Fallback parsing succeeded");
+                    console.warn("[LLMService] Attempting fallback parsing from tool_calls args");
+                    const validated = params.outputSchema.parse(toolCallArgs);
+                    console.warn("[LLMService] Fallback tool_calls parsing succeeded");
+                    addTokens(input, output);
                     return {
                         ...validated,
                         tokenUsage: { input, output },
                     };
                 }
-                catch (fallbackError) {
-                    throw new Error(`LLM failed to return structured output. ` +
-                        `Finish reason: ${finishReason}. ` +
-                        `Raw content preview: ${rawContent.substring(0, 200)}...` +
-                        `Fallback parsing error: ${fallbackError instanceof Error ? fallbackError.message : String(fallbackError)}`);
+                catch (_toolCallFallbackError) {
+                    // Lenient fallback: filter out malformed array entries from tool_calls args
+                    // This handles cases where the model returns mostly valid data with a few corrupt entries
+                    addParseFallback("lenient");
+                    try {
+                        console.warn("[LLMService] Attempting lenient tool_calls parsing (filtering invalid array entries)");
+                        const cleanedArgs = { ...toolCallArgs };
+                        const shape = params.outputSchema?.shape;
+                        if (shape) {
+                            for (const [key, fieldSchema] of Object.entries(shape)) {
+                                if (Array.isArray(cleanedArgs[key])) {
+                                    // In Zod v4, ZodArray exposes .element as the element schema with .safeParse()
+                                    // Unwrap optional/default/nullable wrappers first if present
+                                    let schema = fieldSchema;
+                                    while (schema?.unwrap && !schema?.element) {
+                                        schema = schema.unwrap();
+                                    }
+                                    const elementSchema = schema?.element;
+                                    if (elementSchema && typeof elementSchema.safeParse === "function") {
+                                        const original = cleanedArgs[key];
+                                        cleanedArgs[key] = original.filter((entry) => elementSchema.safeParse(entry).success);
+                                        if (cleanedArgs[key].length < original.length) {
+                                            console.warn(`[LLMService] Filtered ${original.length - cleanedArgs[key].length}/${original.length} invalid entries from "${key}"`);
+                                        }
+                                    }
+                                }
+                            }
+                        }
+                        const validated = params.outputSchema.parse(cleanedArgs);
+                        console.warn("[LLMService] Lenient tool_calls parsing succeeded");
+                        addTokens(input, output);
+                        return {
+                            ...validated,
+                            tokenUsage: { input, output },
+                        };
+                    }
+                    catch {
+                        // Fall through to raw content parsing
+                    }
                 }
             }
-            return {
-                ...response.parsed,
-                tokenUsage: {
-                    input,
-                    output,
-                },
-            };
-        }
-        catch (error) {
-            console.error("[LLMService] Error calling LLM:", error);
-            throw new Error(`LLM service error: ${error instanceof Error ? error.message : "Unknown error"}`);
+            // Attempt fallback parsing from raw content
+            addParseFallback("raw");
+            try {
+                console.warn("[LLMService] Attempting fallback JSON parsing");
+                const manualParse = JSON.parse(rawContent);
+                const validated = params.outputSchema.parse(manualParse);
+                console.warn("[LLMService] Fallback parsing succeeded");
+                addTokens(input, output);
+                return {
+                    ...validated,
+                    tokenUsage: { input, output },
+                };
+            }
+            catch (fallbackError) {
+                throw new Error(`LLM failed to return structured output. ` +
+                    `Finish reason: ${finishReason}. ` +
+                    `Raw content preview: ${rawContent.substring(0, 200)}...` +
+                    `Fallback parsing error: ${fallbackError instanceof Error ? fallbackError.message : String(fallbackError)}`);
+            }
         }
+        addTokens(input, output);
+        return {
+            ...response.parsed,
+            tokenUsage: {
+                input,
+                output,
+            },
+        };
     }
     /**
      * Get session-level token usage statistics
@@ -533,6 +616,7 @@ exports.LLMService = LLMService;
 exports.LLMService = LLMService = __decorate([
     (0, common_1.Injectable)(),
     __metadata("design:paramtypes", [model_service_1.ModelService,
-        config_1.ConfigService])
+        config_1.ConfigService,
+        llm_call_dumper_service_1.LLMCallDumper])
 ], LLMService);
 //# sourceMappingURL=llm.service.js.map