npm - node-red-contrib-knx-ultimate - Versions diffs - 4.2.9 → 4.2.11 - Mend

node-red-contrib-knx-ultimate 4.2.9 → 4.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/CHANGELOG.md +10 -1
package/nodes/knxUltimateAI.html +3 -3
package/nodes/knxUltimateAI.js +245 -56
package/nodes/plugins/knxUltimateAI-vue/assets/app.css +1 -1
package/nodes/plugins/knxUltimateAI-vue/assets/app.js +3 -3
package/package.json +2 -2

package/CHANGELOG.md CHANGED Viewed

@@ -6,7 +6,16 @@
 # CHANGELOG
-**Version 4.2.9** - April 2026<br/>
+**Version 4.2.11** - April 2026<br/>
+- UI: **KNX AI Web** improved assistant workflow with chat-style layout (prompt input under messages), clearer prompt focus, and streamlined Ask page text.<br/>
+- UI: **KNX AI Web** added clearer loading feedback for AI operations (area regeneration/deletion and planner generation), including a centered blocking wait overlay during long-running area generation.<br/>
+- NEW: **KNX AI Web** added bulk action **Delete AI Areas** and localized labels for **Regenerate AI Areas** / **Delete AI Areas** across supported UI languages.<br/>
+- FIX: **KNX AI backend** hardened JSON extraction/parsing for LLM outputs and improved timeout/token-limit diagnostics in error messages.<br/>
+- CHANGE: **KNX AI backend** raised default/forced `llmMaxTokens` handling for structured responses and aligned editor defaults for high-token completions.<br/>
+- Docs/wiki: improved **KNX AI Dashboard** pages for end users, added localized guidance updates, and made support CTA links visible in docs navigation.<br/>
+**Version 4.2.10** - April 2026<br/>
 - UI: **KNX AI Web** sidebar menu style aligned to Homebridge (font size/weight, spacing, icon/text alignment and sidebar widths) for closer visual consistency.<br/>
 - UI: **KNX AI Web** removed sidebar menu subtitles to keep navigation labels clean and compact.<br/>

package/nodes/knxUltimateAI.html CHANGED Viewed

@@ -36,8 +36,8 @@
             llmModel: { value: "gpt-5.4" },
             llmSystemPrompt: { value: "" },
             llmTemperature: { value: 0.2, required: false, validate: RED.validators.number() },
-            llmMaxTokens: { value: 600, required: false, validate: RED.validators.number() },
-            llmTimeoutMs: { value: 30000, required: false, validate: RED.validators.number() },
+            llmMaxTokens: { value: 50000, required: false, validate: RED.validators.number() },
+            llmTimeoutMs: { value: 120000, required: false, validate: RED.validators.number() },
             llmMaxEventsInPrompt: { value: 120, required: false, validate: RED.validators.number() },
             llmIncludeRaw: { value: false },
             llmIncludeFlowContext: { value: true },
@@ -391,4 +391,4 @@
             </div>
         </div>
     </div>
-</script>
+</script>

package/nodes/knxUltimateAI.js CHANGED Viewed

@@ -254,31 +254,124 @@ const buildLlmSummarySnapshot = (summary) => {
 const extractJsonFragmentFromText = (value) => {
   const text = String(value || '').trim()
   if (!text) throw new Error('Empty AI response')
-  const fenced = text.match(/```(?:json)?\s*([\s\S]*?)```/i)
-  const candidate = fenced && fenced[1] ? String(fenced[1]).trim() : text
+  const normalizeCandidate = (input) => String(input || '')
+    .replace(/^\uFEFF/, '')
+    .replace(/^\s*json\s*\n/i, '')
+    .trim()
   const tryParse = (input) => {
-    if (!input) return null
+    const source = normalizeCandidate(input)
+    if (!source) return null
+    try {
+      return JSON.parse(source)
+    } catch (error) {}
+    // Fallback: tolerate comments and trailing commas that some models emit.
+    const relaxed = source
+      .replace(/\/\*[\s\S]*?\*\//g, '')
+      .replace(/^\s*\/\/.*$/gm, '')
+      .replace(/,\s*([}\]])/g, '$1')
+      .trim()
+    if (!relaxed || relaxed === source) return null
     try {
-      return JSON.parse(input)
+      return JSON.parse(relaxed)
     } catch (error) {
       return null
     }
   }
-  const direct = tryParse(candidate)
-  if (direct !== null) return direct
-  const objectStart = candidate.indexOf('{')
-  const objectEnd = candidate.lastIndexOf('}')
-  if (objectStart !== -1 && objectEnd !== -1 && objectEnd > objectStart) {
-    const parsedObject = tryParse(candidate.slice(objectStart, objectEnd + 1))
-    if (parsedObject !== null) return parsedObject
+  const extractBalancedJsonSlices = (input, maxSlices = 24) => {
+    const source = String(input || '')
+    const out = []
+    for (let i = 0; i < source.length; i += 1) {
+      const ch = source[i]
+      if (ch !== '{' && ch !== '[') continue
+      const stack = [ch === '{' ? '}' : ']']
+      let inString = false
+      let escaped = false
+      for (let j = i + 1; j < source.length; j += 1) {
+        const current = source[j]
+        if (inString) {
+          if (escaped) {
+            escaped = false
+            continue
+          }
+          if (current === '\\') {
+            escaped = true
+            continue
+          }
+          if (current === '"') inString = false
+          continue
+        }
+        if (current === '"') {
+          inString = true
+          continue
+        }
+        if (current === '{') {
+          stack.push('}')
+          continue
+        }
+        if (current === '[') {
+          stack.push(']')
+          continue
+        }
+        if ((current === '}' || current === ']') && stack.length) {
+          if (current !== stack[stack.length - 1]) break
+          stack.pop()
+          if (!stack.length) {
+            const slice = normalizeCandidate(source.slice(i, j + 1))
+            if (slice) out.push(slice)
+            i = j
+            break
+          }
+        }
+      }
+      if (out.length >= maxSlices) break
+    }
+    return out
   }
-  const arrayStart = candidate.indexOf('[')
-  const arrayEnd = candidate.lastIndexOf(']')
-  if (arrayStart !== -1 && arrayEnd !== -1 && arrayEnd > arrayStart) {
-    const parsedArray = tryParse(candidate.slice(arrayStart, arrayEnd + 1))
-    if (parsedArray !== null) return parsedArray
+  const candidates = []
+  const seen = new Set()
+  const pushCandidate = (input) => {
+    const normalized = normalizeCandidate(input)
+    if (!normalized || seen.has(normalized)) return
+    seen.add(normalized)
+    candidates.push(normalized)
+  }
+  pushCandidate(text)
+  const fencedRe = /```(?:[a-zA-Z0-9_-]+)?\s*([\s\S]*?)```/g
+  let fenceMatch
+  while ((fenceMatch = fencedRe.exec(text)) !== null) {
+    pushCandidate(fenceMatch[1])
+  }
+  for (const candidate of candidates) {
+    const direct = tryParse(candidate)
+    if (direct !== null) return direct
+    const objectStart = candidate.indexOf('{')
+    const objectEnd = candidate.lastIndexOf('}')
+    if (objectStart !== -1 && objectEnd !== -1 && objectEnd > objectStart) {
+      const parsedObject = tryParse(candidate.slice(objectStart, objectEnd + 1))
+      if (parsedObject !== null) return parsedObject
+    }
+    const arrayStart = candidate.indexOf('[')
+    const arrayEnd = candidate.lastIndexOf(']')
+    if (arrayStart !== -1 && arrayEnd !== -1 && arrayEnd > arrayStart) {
+      const parsedArray = tryParse(candidate.slice(arrayStart, arrayEnd + 1))
+      if (parsedArray !== null) return parsedArray
+    }
+    const balancedSlices = extractBalancedJsonSlices(candidate)
+    for (const slice of balancedSlices) {
+      const parsedSlice = tryParse(slice)
+      if (parsedSlice !== null) return parsedSlice
+    }
   }
-  throw new Error('The LLM response did not contain valid JSON')
+  const preview = text.slice(0, 180).replace(/\s+/g, ' ').trim()
+  throw new Error(`The LLM response did not contain valid JSON${preview ? ` (preview: ${preview})` : ''}`)
 }
 const normalizeValueForCompare = (value) => {
@@ -1624,6 +1717,10 @@ const buildOpenAICompatFallbackText = (json) => {
   return `No assistant text was returned by the provider${usageText}.`
 }
+const isOpenAICompatLengthFallbackText = (value) => {
+  return /^The model stopped because of token limit\b/i.test(String(value || '').trim())
+}
 const DPT_OPTIONS_CACHE = new Map()
 const getDptValueOptions = (dptId) => {
@@ -2042,15 +2139,25 @@ const buildRelevantDocsContext = ({ moduleRootDir, question, preferredLangDir, m
 }
 const postJson = async ({ url, headers, body, timeoutMs }) => {
+  const resolvedTimeoutMs = Math.max(1000, Number(timeoutMs) || 30000)
   const controller = new AbortController()
-  const timer = setTimeout(() => controller.abort(), timeoutMs || 30000)
+  const timer = setTimeout(() => controller.abort(), resolvedTimeoutMs)
   try {
-    const res = await fetch(url, {
-      method: 'POST',
-      headers: Object.assign({ 'content-type': 'application/json' }, headers || {}),
-      body: JSON.stringify(body || {}),
-      signal: controller.signal
-    })
+    let res
+    try {
+      res = await fetch(url, {
+        method: 'POST',
+        headers: Object.assign({ 'content-type': 'application/json' }, headers || {}),
+        body: JSON.stringify(body || {}),
+        signal: controller.signal
+      })
+    } catch (error) {
+      const isAbort = (error && error.name === 'AbortError') || /\babort(ed)?\b/i.test(String(error && error.message ? error.message : ''))
+      if (isAbort) {
+        throw new Error(`LLM request timeout after ${Math.round(resolvedTimeoutMs / 1000)}s. Increase "Timeout ms" in the KNX AI node settings or reduce prompt context.`)
+      }
+      throw error
+    }
     const text = await res.text()
     let json
     try {
@@ -2072,8 +2179,9 @@ const postJson = async ({ url, headers, body, timeoutMs }) => {
 }
 const getJson = async ({ url, headers, timeoutMs }) => {
+  const resolvedTimeoutMs = Math.max(1000, Number(timeoutMs) || 20000)
   const controller = new AbortController()
-  const timer = setTimeout(() => controller.abort(), timeoutMs || 20000)
+  const timer = setTimeout(() => controller.abort(), resolvedTimeoutMs)
   try {
     const res = await fetch(url, { method: 'GET', headers: headers || {}, signal: controller.signal })
     const text = await res.text()
@@ -2612,6 +2720,25 @@ module.exports = function (RED) {
       }
     })
+    RED.httpAdmin.post('/knxUltimateAI/sidebar/areas/delete-llm', RED.auth.needsPermission('knxUltimate-config.write'), async (req, res) => {
+      try {
+        const nodeId = req.body?.nodeId ? String(req.body.nodeId) : ''
+        if (!nodeId) {
+          res.status(400).json({ error: 'Missing nodeId' })
+          return
+        }
+        const n = aiRuntimeNodes.get(nodeId) || RED.nodes.getNode(nodeId)
+        if (!n || n.type !== 'knxUltimateAI' || typeof n.deleteAllLlmAreas !== 'function') {
+          res.status(404).json({ error: 'KNX AI node not found' })
+          return
+        }
+        const ret = await n.deleteAllLlmAreas()
+        res.json(ret)
+      } catch (error) {
+        res.status(error.status || 500).json({ error: error.message || String(error) })
+      }
+    })
     RED.httpAdmin.post('/knxUltimateAI/sidebar/areas/catalog', RED.auth.needsPermission('knxUltimate-config.read'), async (req, res) => {
       try {
         const nodeId = req.body?.nodeId ? String(req.body.nodeId) : ''
@@ -3203,8 +3330,8 @@ module.exports = function (RED) {
     node.llmModel = config.llmModel || 'gpt-4o-mini'
     node.llmSystemPrompt = config.llmSystemPrompt || 'You are a KNX building automation assistant. Analyze KNX bus traffic and provide actionable insights.'
     node.llmTemperature = (config.llmTemperature === undefined || config.llmTemperature === '') ? 0.2 : Number(config.llmTemperature)
-    node.llmMaxTokens = (config.llmMaxTokens === undefined || config.llmMaxTokens === '') ? 600 : Number(config.llmMaxTokens)
-    node.llmTimeoutMs = (config.llmTimeoutMs === undefined || config.llmTimeoutMs === '') ? 30000 : Number(config.llmTimeoutMs)
+    node.llmMaxTokens = (config.llmMaxTokens === undefined || config.llmMaxTokens === '') ? 50000 : Number(config.llmMaxTokens)
+    node.llmTimeoutMs = (config.llmTimeoutMs === undefined || config.llmTimeoutMs === '') ? 120000 : Number(config.llmTimeoutMs)
     node.llmMaxEventsInPrompt = (config.llmMaxEventsInPrompt === undefined || config.llmMaxEventsInPrompt === '') ? 120 : Number(config.llmMaxEventsInPrompt)
     node.llmIncludeRaw = config.llmIncludeRaw !== undefined ? coerceBoolean(config.llmIncludeRaw) : false
     node.llmIncludeFlowContext = config.llmIncludeFlowContext !== undefined ? coerceBoolean(config.llmIncludeFlowContext) : true
@@ -4377,51 +4504,61 @@ module.exports = function (RED) {
       }, 90)
     }
-    const buildLLMPrompt = ({ question, summary }) => {
+    const buildLLMPrompt = ({ question, summary, compact = false } = {}) => {
+      const compactMode = compact === true
       const maxEventsRequested = Math.max(10, Number(node.llmMaxEventsInPrompt) || 120)
-      const maxEvents = Math.min(240, maxEventsRequested)
+      const maxEvents = Math.min(compactMode ? 80 : 240, maxEventsRequested)
       const recent = node._history.slice(-maxEvents)
       const wantsSvgChart = shouldGenerateSvgChart(question)
       const areasSnapshot = buildAreasSnapshot({ summary })
       const areasContext = buildAreasPromptContext(areasSnapshot)
       const summaryForPrompt = buildLlmSummarySnapshot(summary)
-      const summaryText = truncatePromptText(safeStringify(summaryForPrompt), 10000)
+      const summaryText = truncatePromptText(safeStringify(summaryForPrompt), compactMode ? 4000 : 10000)
       const lines = recent.map(t => {
         const payloadStr = normalizeValueForCompare(t.payload)
         const rawStr = (node.llmIncludeRaw && t.rawHex) ? ` raw=${t.rawHex}` : ''
         const devName = t.devicename ? ` (${t.devicename})` : ''
         return `${new Date(t.ts).toISOString()} ${t.event} ${t.source} -> ${t.destination}${devName} dpt=${t.dpt} payload=${payloadStr}${rawStr}`
       })
-      const recentLines = takeLastItemsByCharBudget(lines, 7000)
+      const recentLines = takeLastItemsByCharBudget(lines, compactMode ? 2600 : 7000)
       let flowContext = ''
       if (node.llmIncludeFlowContext) {
+        const flowMaxChars = compactMode ? 1400 : 5000
         const ttlMs = 10 * 1000
         const now = nowMs()
         if (node._flowContextCache && node._flowContextCache.text && (now - (node._flowContextCache.at || 0)) < ttlMs) {
           flowContext = node._flowContextCache.text
         } else {
-          flowContext = buildKnxUltimateFlowInventory({ maxNodes: Math.max(0, Number(node.llmMaxFlowNodesInPrompt) || 0) })
-          flowContext = truncatePromptText(flowContext, 5000)
+          const configuredMaxFlowNodes = Math.max(0, Number(node.llmMaxFlowNodesInPrompt) || 0)
+          const maxFlowNodes = compactMode
+            ? (configuredMaxFlowNodes > 0 ? Math.min(configuredMaxFlowNodes, 30) : 0)
+            : configuredMaxFlowNodes
+          flowContext = buildKnxUltimateFlowInventory({ maxNodes: maxFlowNodes })
+          flowContext = truncatePromptText(flowContext, flowMaxChars)
           node._flowContextCache = { at: now, text: flowContext }
         }
+        flowContext = truncatePromptText(flowContext, flowMaxChars)
       }
       let docsContext = ''
       if (node.llmIncludeDocsSnippets) {
+        const docsMaxCharsConfigured = Math.max(500, Math.min(5000, Number(node.llmDocsMaxChars) || 500))
+        const docsMaxChars = compactMode ? Math.min(docsMaxCharsConfigured, 1200) : docsMaxCharsConfigured
+        const docsMaxSnippetsConfigured = Math.max(1, Number(node.llmDocsMaxSnippets) || 1)
+        const docsMaxSnippets = compactMode ? Math.min(docsMaxSnippetsConfigured, 2) : docsMaxSnippetsConfigured
         const ttlMs = 30 * 1000
         const now = nowMs()
         const q = String(question || '').trim()
         if (node._docsContextCache && node._docsContextCache.text && node._docsContextCache.question === q && (now - (node._docsContextCache.at || 0)) < ttlMs) {
-          docsContext = node._docsContextCache.text
+          docsContext = truncatePromptText(node._docsContextCache.text, docsMaxChars)
         } else {
           const preferredLangDir = (node.llmDocsLanguage && node.llmDocsLanguage !== 'auto') ? node.llmDocsLanguage : ''
-          const docsMaxChars = Math.max(500, Math.min(5000, Number(node.llmDocsMaxChars) || 500))
           docsContext = buildRelevantDocsContext({
             moduleRootDir,
             question: q,
             preferredLangDir,
-            maxSnippets: Math.max(1, Number(node.llmDocsMaxSnippets) || 1),
+            maxSnippets: docsMaxSnippets,
             maxChars: docsMaxChars
           })
           docsContext = truncatePromptText(docsContext, docsMaxChars)
@@ -4997,13 +5134,15 @@ module.exports = function (RED) {
           translated: false
         }
       }
+      const jsonMaxTokens = Math.max(50000, Number(node.llmMaxTokens) || 0)
       const llmResponse = await callLLMChat({
         systemPrompt: [
           'You are a KNX installer assistant.',
           'Translate only human-readable KNX test labels.',
           'Return JSON only.'
         ].join(' '),
-        userContent: buildTestPlanTranslationPrompt({ language: targetLanguage, languageName: targetLanguageName, plan })
+        userContent: buildTestPlanTranslationPrompt({ language: targetLanguage, languageName: targetLanguageName, plan }),
+        maxTokensOverride: jsonMaxTokens
       })
       const parsed = extractJsonFragmentFromText(llmResponse.content)
       const translatedSteps = Array.isArray(parsed)
@@ -5045,13 +5184,15 @@ module.exports = function (RED) {
     const suggestGaRoleOverridesWithLlm = async ({ gaCatalog }) => {
       const candidates = (Array.isArray(gaCatalog) ? gaCatalog : []).filter(item => item && item.ga && isAmbiguousGaRoleSource(item.baseRoleSource || item.roleSource))
       if (!candidates.length) return {}
+      const jsonMaxTokens = Math.max(50000, Number(node.llmMaxTokens) || 0)
       const llmResponse = await callLLMChat({
         systemPrompt: [
           'You are a KNX installation modeling assistant.',
           'Classify KNX group addresses as command, status, or neutral for installers.',
           'Return JSON only.'
         ].join(' '),
-        userContent: buildGaRoleSuggestionPrompt({ gaCatalog: candidates })
+        userContent: buildGaRoleSuggestionPrompt({ gaCatalog: candidates }),
+        maxTokensOverride: jsonMaxTokens
       })
       const parsed = extractJsonFragmentFromText(llmResponse.content)
       const gaCatalogMap = new Map(candidates.map(item => [String(item.ga).trim(), item]))
@@ -5729,17 +5870,38 @@ module.exports = function (RED) {
       }
     }
+    node.deleteAllLlmAreas = async () => {
+      const currentOverrides = Object.assign({}, loadAreaOverrides())
+      const llmAreaIds = Object.keys(currentOverrides).filter(key => String(key || '').startsWith('llm:'))
+      llmAreaIds.forEach((areaId) => {
+        delete currentOverrides[areaId]
+      })
+      writeAreaOverrides(currentOverrides)
+      const summary = node._lastSummary || rebuildCachedSummaryNow()
+      return {
+        ok: true,
+        deletedCount: llmAreaIds.length,
+        areas: buildAreasSnapshot({ summary }),
+        profiles: buildProfilesSnapshot(),
+        actuatorTests: buildActuatorTestsSnapshot(),
+        testPlans: buildAiTestPlansSnapshot(),
+        gaCatalog: getGaCatalogSnapshot()
+      }
+    }
     node.regenerateLlmAreas = async () => {
       if (!node.llmEnabled) throw new Error('LLM is disabled in the KNX AI node config')
       const gaCatalog = getGaCatalogSnapshot()
       if (!Array.isArray(gaCatalog) || !gaCatalog.length) throw new Error('No ETS group addresses available')
+      const jsonMaxTokens = Math.max(50000, Number(node.llmMaxTokens) || 0)
       const llmResponse = await callLLMChat({
         systemPrompt: [
           'You are a KNX installation modeling assistant.',
           'Group KNX group addresses into practical installer-friendly operational areas.',
           'Return JSON only.'
         ].join(' '),
-        userContent: buildAreaRegenerationPrompt({ gaCatalog })
+        userContent: buildAreaRegenerationPrompt({ gaCatalog }),
+        maxTokensOverride: jsonMaxTokens
       })
       const parsed = extractJsonFragmentFromText(llmResponse.content)
       const rawAreas = Array.isArray(parsed)
@@ -5812,6 +5974,7 @@ module.exports = function (RED) {
       if (!installerPrompt) throw new Error('Missing prompt')
       const gaCatalog = getGaCatalogSnapshot()
       if (!Array.isArray(gaCatalog) || !gaCatalog.length) throw new Error('No ETS group addresses available')
+      const jsonMaxTokens = Math.max(50000, Number(node.llmMaxTokens) || 0)
       const llmResponse = await callLLMChat({
         systemPrompt: [
           'You are a KNX installation modeling assistant.',
@@ -5824,7 +5987,8 @@ module.exports = function (RED) {
           draftDescription: description,
           currentGaList: Array.isArray(gaList) ? gaList : [],
           gaCatalog
-        })
+        }),
+        maxTokensOverride: jsonMaxTokens
       })
       const parsed = extractJsonFragmentFromText(llmResponse.content)
       const gaCatalogMap = new Map(gaCatalog.map(item => [String(item && item.ga ? item.ga : '').trim(), item]))
@@ -6197,11 +6361,18 @@ module.exports = function (RED) {
       }
     }
-    const callLLMChat = async ({ systemPrompt, userContent, jsonSchema = null }) => {
+    const callLLMChat = async ({ systemPrompt, userContent, jsonSchema = null, maxTokensOverride = null }) => {
       if (!node.llmEnabled) throw new Error('LLM is disabled in node config')
       if (!node.llmApiKey && node.llmProvider !== 'ollama') {
         throw new Error('Missing API key: paste only the OpenAI key (starts with sk-), without "Bearer"')
       }
+    const maxTokensRaw = (maxTokensOverride !== null && maxTokensOverride !== undefined && maxTokensOverride !== '')
+      ? Number(maxTokensOverride)
+      : Number(node.llmMaxTokens)
+    const resolvedMaxTokens = Number.isFinite(maxTokensRaw) && maxTokensRaw > 0 ? Math.round(maxTokensRaw) : 10000
+    const configuredTimeoutMs = Number(node.llmTimeoutMs)
+    const resolvedTimeoutMs = Number.isFinite(configuredTimeoutMs) && configuredTimeoutMs > 0 ? Math.round(configuredTimeoutMs) : 30000
+    const effectiveTimeoutMs = Math.max(120000, resolvedTimeoutMs)
       if (node.llmProvider === 'ollama') {
         const url = node.llmBaseUrl || 'http://localhost:11434/api/chat'
@@ -6216,9 +6387,9 @@ module.exports = function (RED) {
             temperature: node.llmTemperature
           }
         }
-        const json = await postJson({ url, body, timeoutMs: node.llmTimeoutMs })
+        const json = await postJson({ url, body, timeoutMs: effectiveTimeoutMs })
         const content = json && json.message && typeof json.message.content === 'string' ? json.message.content : safeStringify(json)
-        return { provider: 'ollama', model: body.model, content }
+        return { provider: 'ollama', model: body.model, content, finishReason: String(json && json.done_reason ? json.done_reason : '') }
       }
       // Default: OpenAI-compatible chat/completions
@@ -6246,10 +6417,10 @@ module.exports = function (RED) {
       // Some OpenAI models (and some compatible gateways) require `max_completion_tokens` instead of `max_tokens`.
       // Try with `max_tokens` first for broad compatibility, then fallback once if the server rejects it.
-      const bodyWithMaxTokens = Object.assign({ max_tokens: node.llmMaxTokens }, schemaBody)
-      const bodyWithMaxCompletionTokens = Object.assign({ max_completion_tokens: node.llmMaxTokens }, schemaBody)
-      const plainBodyWithMaxTokens = Object.assign({ max_tokens: node.llmMaxTokens }, baseBody)
-      const plainBodyWithMaxCompletionTokens = Object.assign({ max_completion_tokens: node.llmMaxTokens }, baseBody)
+      const bodyWithMaxTokens = Object.assign({ max_tokens: resolvedMaxTokens }, schemaBody)
+      const bodyWithMaxCompletionTokens = Object.assign({ max_completion_tokens: resolvedMaxTokens }, schemaBody)
+      const plainBodyWithMaxTokens = Object.assign({ max_tokens: resolvedMaxTokens }, baseBody)
+      const plainBodyWithMaxCompletionTokens = Object.assign({ max_completion_tokens: resolvedMaxTokens }, baseBody)
       const isResponseFormatCompatibilityError = (message) => {
         const msg = String(message || '')
@@ -6261,32 +6432,33 @@ module.exports = function (RED) {
       let json
       try {
-        json = await postJson({ url, headers, body: bodyWithMaxTokens, timeoutMs: node.llmTimeoutMs })
+        json = await postJson({ url, headers, body: bodyWithMaxTokens, timeoutMs: effectiveTimeoutMs })
       } catch (error) {
         const msg = (error && error.message) ? String(error.message) : ''
         if (isResponseFormatCompatibilityError(msg)) {
           try {
-            json = await postJson({ url, headers, body: plainBodyWithMaxTokens, timeoutMs: node.llmTimeoutMs })
+            json = await postJson({ url, headers, body: plainBodyWithMaxTokens, timeoutMs: effectiveTimeoutMs })
           } catch (innerError) {
             const innerMsg = (innerError && innerError.message) ? String(innerError.message) : ''
             if (innerMsg.includes("Unsupported parameter: 'max_tokens'") || innerMsg.includes('max_completion_tokens')) {
-              json = await postJson({ url, headers, body: plainBodyWithMaxCompletionTokens, timeoutMs: node.llmTimeoutMs })
+              json = await postJson({ url, headers, body: plainBodyWithMaxCompletionTokens, timeoutMs: effectiveTimeoutMs })
             } else if (innerMsg.includes("Unsupported parameter: 'max_completion_tokens'")) {
-              json = await postJson({ url, headers, body: plainBodyWithMaxTokens, timeoutMs: node.llmTimeoutMs })
+              json = await postJson({ url, headers, body: plainBodyWithMaxTokens, timeoutMs: effectiveTimeoutMs })
             } else {
               throw innerError
             }
           }
         } else if (msg.includes("Unsupported parameter: 'max_tokens'") || msg.includes('max_completion_tokens')) {
-          json = await postJson({ url, headers, body: bodyWithMaxCompletionTokens, timeoutMs: node.llmTimeoutMs })
+          json = await postJson({ url, headers, body: bodyWithMaxCompletionTokens, timeoutMs: effectiveTimeoutMs })
         } else if (msg.includes("Unsupported parameter: 'max_completion_tokens'")) {
-          json = await postJson({ url, headers, body: bodyWithMaxTokens, timeoutMs: node.llmTimeoutMs })
+          json = await postJson({ url, headers, body: bodyWithMaxTokens, timeoutMs: effectiveTimeoutMs })
         } else {
           throw error
         }
       }
       const content = extractOpenAICompatText(json) || buildOpenAICompatFallbackText(json)
-      return { provider: 'openai_compat', model: baseBody.model, content }
+      const finishReason = String(json && json.choices && json.choices[0] && json.choices[0].finish_reason ? json.choices[0].finish_reason : '')
+      return { provider: 'openai_compat', model: baseBody.model, content, finishReason }
     }
     node.generateAiTestPlan = async ({ areaId, prompt, language } = {}) => {
@@ -6351,10 +6523,27 @@ module.exports = function (RED) {
     const callLLM = async ({ question }) => {
       const summary = rebuildCachedSummaryNow()
       const userContent = buildLLMPrompt({ question, summary })
-      const ret = await callLLMChat({
+      const configuredMaxTokens = Math.max(10000, Number(node.llmMaxTokens) || 0)
+      let ret = await callLLMChat({
         systemPrompt: node.llmSystemPrompt || '',
-        userContent
+        userContent,
+        maxTokensOverride: configuredMaxTokens
       })
+      const finishReason = String(ret && ret.finishReason ? ret.finishReason : '').trim().toLowerCase()
+      const lengthLimited = finishReason === 'length' || isOpenAICompatLengthFallbackText(ret && ret.content)
+      if (lengthLimited) {
+        const compactPrompt = buildLLMPrompt({ question, summary, compact: true })
+        const retryMaxTokens = Math.min(16000, Math.max(10000, Math.round(configuredMaxTokens * 1.25)))
+        try {
+          ret = await callLLMChat({
+            systemPrompt: node.llmSystemPrompt || '',
+            userContent: compactPrompt,
+            maxTokensOverride: retryMaxTokens
+          })
+        } catch (retryError) {
+          // Keep the first provider answer if retry fails.
+        }
+      }
       const finalContent = ensureSvgChartResponse({ question, summary, content: ret.content })
       return Object.assign({}, ret, { content: finalContent, summary })
     }