RubyGems - llm_meta_client - Versions diffs - 1.3.0 → 1.5.0 - Mend

llm_meta_client 1.3.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

data/lib/generators/llm_meta_client/scaffold/templates/app/javascript/controllers/message_stream_controller.js CHANGED Viewed

@@ -4,17 +4,20 @@ import { Controller } from "@hotwired/stimulus"
 // Opens an EventSource on connect, appends each delta to the content target,
 // closes on `done` / `error`.
 export default class extends Controller {
-  static targets = ["content"]
+  static targets = ["content", "cancelButton"]
   static values = { url: String }
   connect() {
     this.completed = false
+    this.cancelled = false
     this.source = new EventSource(this.urlValue)
     this.source.addEventListener("message", (e) => this.#onDelta(e))
     this.source.addEventListener("done", () => this.#onDone())
     this.source.addEventListener("title", (e) => this.#onTitle(e))
     this.source.addEventListener("saved", (e) => this.#onSaved(e))
     this.source.addEventListener("tool_calls", (e) => this.#onToolCalls(e))
+    this.source.addEventListener("thinking", (e) => this.#onThinking(e))
+    this.source.addEventListener("phase", (e) => this.#onPhase(e))
     this.source.addEventListener("error", (e) => this.#onError(e))
   }
@@ -26,10 +29,20 @@ export default class extends Controller {
     let delta
     try { delta = JSON.parse(event.data).delta } catch { return }
     if (!delta) return
+    // First content delta after a "thinking" phase: flip the role label so
+    // the user knows generation has actually started.
+    this.#exitThinkingPhase()
     this.contentTarget.append(delta)
     this.#scrollToBottom()
   }
+  #exitThinkingPhase() {
+    const role = this.element.querySelector(".message-role")
+    if (role && role.textContent.includes("Thinking")) {
+      role.textContent = "🤖 streaming…"
+    }
+  }
   #onTitle(event) {
     try {
       const data = JSON.parse(event.data)
@@ -48,9 +61,87 @@ export default class extends Controller {
       // markdown; remove the transient tool-call bubbles so reload and live look
       // the same.
       this.#removeTransientToolCallBubbles()
+      // Fold the live transient sections now (not on `done`) — `done` fires
+      // AFTER the synchronous title-generation round-trip, which can take
+      // several seconds and would leave the reasoning section open the
+      // whole time. Saved is the natural moment: the final message is
+      // already persisted and rendered.
+      this.#foldTransientSections()
     } catch {}
   }
+  // Thinking deltas (Ollama hybrid models with `think: true`) — rendered
+  // live in a <details> block above the assistant content. Ephemeral:
+  // the server never persists thinking, so this block won't reappear on
+  // page reload.
+  #onThinking(event) {
+    let delta
+    try { delta = JSON.parse(event.data).delta } catch { return }
+    if (!delta) return
+    const body = this.#thinkingContentEl()
+    body.append(delta)
+    // Keep the scroll glued to the bottom of the fixed-height thinking
+    // viewport so users see the latest reasoning as it streams.
+    body.scrollTop = body.scrollHeight
+    this.#scrollToBottom()
+  }
+  #thinkingContentEl() {
+    if (this._thinkingContent) return this._thinkingContent
+    const details = document.createElement("details")
+    // `thinking-active` triggers the dots animation on the summary while
+    // reasoning is in progress; it's removed by #foldTransientSections
+    // at end-of-stream. The body is fixed-height + scrollable while open
+    // (see chats.css) so a long reasoning trace doesn't dominate the
+    // screen even when expanded.
+    details.className = "message-thinking thinking-active"
+    // Open during streaming; #foldTransientSections collapses it once
+    // the assistant's final message is saved.
+    details.open = true
+    const summary = document.createElement("summary")
+    summary.textContent = "Reasoning"
+    // Three staggered-fade dots after "Reasoning". CSS handles the
+    // animation; the dots are hidden by default and revealed by the
+    // .thinking-active class on the wrapping <details>.
+    const dots = document.createElement("span")
+    dots.className = "thinking-dots"
+    for (let i = 0; i < 3; i += 1) {
+      const dot = document.createElement("span")
+      dot.textContent = "."
+      dots.appendChild(dot)
+    }
+    summary.appendChild(dots)
+    const body = document.createElement("div")
+    body.className = "message-thinking-content"
+    details.appendChild(summary)
+    details.appendChild(body)
+    this.contentTarget.parentNode.insertBefore(details, this.contentTarget)
+    this._thinkingContent = body
+    this._thinkingDetails = details
+    return body
+  }
+  // Collapse the transient streaming-only sections (thinking + live
+  // tool-call bubbles) so the assistant's final message gets the focus.
+  // Called from #onDone and cancel(); #onSaved goes further and removes
+  // the live tool-call bubbles outright, so this is mostly cosmetic for
+  // the no-save (empty content) and cancel paths.
+  #foldTransientSections() {
+    if (this._thinkingDetails) {
+      this._thinkingDetails.open = false
+      // Stop the live "thinking…" animation now that streaming has finished.
+      this._thinkingDetails.classList.remove("thinking-active")
+    }
+    document.querySelectorAll(".tool-call-streaming details.tool-calls-section[open]")
+      .forEach((d) => { d.open = false })
+  }
+  #removeThinkingBlock() {
+    const el = this._thinkingContent?.parentNode
+    if (el && el.parentNode) el.parentNode.removeChild(el)
+    this._thinkingContent = null
+  }
   #onToolCalls(event) {
     try {
       const data = JSON.parse(event.data)
@@ -69,6 +160,21 @@ export default class extends Controller {
     document.querySelectorAll(".tool-call-streaming").forEach((el) => el.remove())
   }
+  // Phase events from the server signal what it's currently doing during the
+  // synchronous parts of a tool turn (model thinking, tool execution). The role
+  // label reflects the phase so users know progress is real, not a hang.
+  #onPhase(event) {
+    let name
+    try { name = JSON.parse(event.data).name } catch { return }
+    const role = this.element.querySelector(".message-role")
+    if (!role) return
+    const labels = {
+      thinking:    "🤔 Thinking…",
+      responding:  "🤖 streaming…",
+    }
+    if (labels[name]) role.textContent = labels[name]
+  }
   // Swap the streaming bubble's role + content with the host-rendered _message
   // partial output so any markdown / syntax highlighting / partial customizations
   // applied on reload also apply right after the stream finishes. We don't
@@ -92,13 +198,32 @@ export default class extends Controller {
   #onDone() {
     this.completed = true
+    this.#foldTransientSections()
+    this.#close()
+  }
+  // User clicked the cancel button. Closing the EventSource is enough to
+  // cascade cancellation upstream — the host's next stream write will raise
+  // ClientDisconnected, propagating cleanly all the way to the provider HTTP
+  // socket. The host's controller persists whatever partial content was
+  // forwarded so the bubble's content matches what's saved on reload.
+  cancel() {
+    if (this.completed || this.cancelled) return
+    this.cancelled = true
+    const role = this.element.querySelector(".message-role")
+    if (role) role.textContent = "🚫 cancelled"
+    this.element.classList.remove("streaming")
+    this.element.classList.add("cancelled")
+    if (this.hasCancelButtonTarget) this.cancelButtonTarget.remove()
+    this.#foldTransientSections()
     this.#close()
   }
   #onError(event) {
     // EventSource fires onerror whenever the connection closes — including
-    // immediately after a clean `event: done`. Suppress those.
-    if (this.completed) {
+    // immediately after a clean `event: done` or a user-initiated cancel.
+    // Suppress those.
+    if (this.completed || this.cancelled) {
       this.#close()
       return
     }

data/lib/generators/llm_meta_client/scaffold/templates/app/javascript/controllers/model_picker_controller.js ADDED Viewed

@@ -0,0 +1,160 @@
+import { Controller } from "@hotwired/stimulus"
+// Connects to data-controller="model-picker"
+//
+// Replaces the previous cascading family → api_key → model dropdowns
+// with a single mechanism that backs both:
+//   * the quick-picks row below the prompt (Default + favorites), and
+//   * the "Other models" grid panel.
+//
+// Every clickable element (`.quick-pick-button`, `.model-grid-cell`)
+// carries `data-family`, `data-api-key-uuid`, `data-model`, and
+// `data-supports-vision`. Clicking writes those values into hidden form
+// fields (`family`, `api_key_uuid`, `model`) so the chat form submits
+// exactly what the meta-server's API expects.
+//
+// On connect it picks a sensible initial selection: the user's default
+// (the .is-default button) if present, otherwise the first quick-pick,
+// otherwise the first grid cell.
+export default class extends Controller {
+  static targets = ["family", "apiKey", "model", "supportsVision", "quickPicks"]
+  connect() {
+    // Continuing-chat path: the server pre-populated hidden fields with the
+    // chat's most recent (family, api_key, model). Honor that so the picker
+    // opens on the same model the chat was last using — otherwise the user
+    // sees no indication of what their next prompt will hit. Falls through
+    // to #pickInitial for new chats (empty fields).
+    if (this.hasModelTarget && this.modelTarget.value) {
+      const match = this.element.querySelector(
+        `[data-model="${CSS.escape(this.modelTarget.value)}"]`
+      )
+      if (match) { this.#clickAsPick(match); return }
+      // The previously-used model is no longer in the visible options
+      // (removed from the catalog, key access lost, etc.). Re-apply from
+      // the hidden-field values directly so the form still submits
+      // cleanly; #ensureQuickPickFor synthesizes a transient pill.
+      this.#applySelection({
+        family:         this.hasFamilyTarget ? this.familyTarget.value : "",
+        apiKeyUuid:     this.hasApiKeyTarget ? this.apiKeyTarget.value : "",
+        model:          this.modelTarget.value,
+        supportsVision: this.modelTarget.dataset.supportsVision === "true",
+        label:          this.modelTarget.value
+      })
+      return
+    }
+    this.#pickInitial()
+  }
+  // Action: data-action="click->model-picker#pick"
+  pick(event) {
+    const el = event.currentTarget
+    this.#applySelection({
+      family:           el.dataset.family,
+      apiKeyUuid:       el.dataset.apiKeyUuid,
+      model:            el.dataset.model,
+      supportsVision:   el.dataset.supportsVision === "true",
+      label:            this.#labelFor(el),
+    })
+    // If the click came from inside the "Other models" panel, collapse
+    // it now that a selection has been made.
+    const panel = el.closest('.llm-toggle-panel')
+    if (panel && panel.style.display !== "none") {
+      const toggle = panel.closest('.llm-toggle-field')
+      const toggleButton = toggle?.querySelector('.llm-toggle-button')
+      toggleButton?.click()
+    }
+  }
+  #pickInitial() {
+    // Prefer the marked default if it's present in the quick-picks row.
+    const defaultBtn = this.element.querySelector(".quick-pick-button.is-default")
+    if (defaultBtn) { this.#clickAsPick(defaultBtn); return }
+    const firstQuick = this.element.querySelector(".quick-pick-button")
+    if (firstQuick) { this.#clickAsPick(firstQuick); return }
+    const firstCell = this.element.querySelector(".model-grid-cell")
+    if (firstCell) { this.#clickAsPick(firstCell) }
+  }
+  // Apply the same logic as a real click without firing the user-event
+  // side effects (collapsing the panel). Used at #connect time.
+  #clickAsPick(el) {
+    this.#applySelection({
+      family:           el.dataset.family,
+      apiKeyUuid:       el.dataset.apiKeyUuid,
+      model:            el.dataset.model,
+      supportsVision:   el.dataset.supportsVision === "true",
+      label:            this.#labelFor(el),
+    })
+  }
+  #applySelection({ family, apiKeyUuid, model, supportsVision, label }) {
+    if (this.hasFamilyTarget) this.familyTarget.value = family || ""
+    if (this.hasApiKeyTarget) this.apiKeyTarget.value = apiKeyUuid || ""
+    if (this.hasModelTarget) {
+      this.modelTarget.value = model || ""
+      // Expose supports_vision via a dataset attribute on the model
+      // input — chats_form_controller reads it to drive the attach
+      // button's enabled state (vision-only models accept images).
+      this.modelTarget.dataset.supportsVision = supportsVision ? "true" : "false"
+    }
+    // If the picked model has no representation in the quick-picks row
+    // (i.e. came from the grid and isn't a favorite / default), add a
+    // transient pill so the user always sees what's currently selected.
+    if (model) this.#ensureQuickPickFor({ family, apiKeyUuid, model, supportsVision, label })
+    // Mark the picked element(s) as selected; clear others. Match by
+    // data-model so any other button representing the same model (e.g.,
+    // the same favorite appearing in both the quick-picks row and the
+    // grid cell) also highlights.
+    this.element.querySelectorAll(".quick-pick-button.is-selected, .model-grid-cell.is-selected")
+      .forEach((el) => el.classList.remove("is-selected"))
+    if (model) {
+      this.element.querySelectorAll(`[data-model="${CSS.escape(model)}"]`)
+        .forEach((el) => el.classList.add("is-selected"))
+    }
+    // Tell chats-form to refresh the send button / attach button state.
+    this.dispatch("changed", { bubbles: true })
+  }
+  // Inject a transient `.quick-pick-button.is-transient` into the row
+  // when the picked model isn't already represented there. At most one
+  // transient pill exists at a time — it's replaced on each grid pick.
+  #ensureQuickPickFor({ family, apiKeyUuid, model, supportsVision, label }) {
+    if (!this.hasQuickPicksTarget) return
+    const existing = this.quickPicksTarget.querySelector(
+      `.quick-pick-button[data-model="${CSS.escape(model)}"]`
+    )
+    if (existing) {
+      // Already in the row (default or favorite). Drop any stale
+      // transient pill so we don't leave a duplicate of a previous pick.
+      this.quickPicksTarget.querySelectorAll(".quick-pick-button.is-transient")
+        .forEach((el) => el.remove())
+      return
+    }
+    // Otherwise rebuild the single transient slot.
+    this.quickPicksTarget.querySelectorAll(".quick-pick-button.is-transient")
+      .forEach((el) => el.remove())
+    const btn = document.createElement("button")
+    btn.type = "button"
+    btn.className = "quick-pick-button is-transient"
+    btn.title = model
+    btn.dataset.family = family || ""
+    btn.dataset.apiKeyUuid = apiKeyUuid || ""
+    btn.dataset.model = model
+    btn.dataset.supportsVision = supportsVision ? "true" : "false"
+    btn.dataset.action = `click->${this.identifier}#pick`
+    btn.textContent = label || model
+    this.quickPicksTarget.appendChild(btn)
+  }
+  #labelFor(el) {
+    // Grid cells wrap the name in `.model-grid-label`; quick-pick
+    // buttons just put text directly inside. Either way, trim it.
+    const gridLabel = el.querySelector?.(".model-grid-label")
+    return (gridLabel?.textContent || el.textContent || "").trim()
+  }
+}

data/lib/generators/llm_meta_client/scaffold/templates/app/javascript/controllers/tool_selector_controller.js CHANGED Viewed

@@ -25,13 +25,15 @@ export default class extends Controller {
     this.panelTarget.style.display = this.expanded ? "block" : "none"
     if (this.hasToggleIconTarget) {
-      this.toggleIconTarget.classList.toggle("bi-chevron-down", !this.expanded)
-      this.toggleIconTarget.classList.toggle("bi-chevron-up", this.expanded)
+      this.toggleIconTarget.classList.toggle("bi-chevron-up", !this.expanded)
+      this.toggleIconTarget.classList.toggle("bi-chevron-down", this.expanded)
     }
     if (this.expanded && this.mcpServers.length === 0) {
       this.#fetchMcpServers()
     }
+    if (this.expanded) this.dispatch("opened", { bubbles: true })
   }
   toggleServer(event) {
@@ -108,11 +110,17 @@ export default class extends Controller {
       const serverDiv = document.createElement("div")
       serverDiv.className = "mcp-server-item"
       const escapedUuid = this.#escapeAttr(server.uuid)
+      const sharedBadge = server.owned === false
+        ? `<span class="mcp-server-shared-badge" title="Shared by ${this.#escapeAttr(server.shared_by || "another user")} — verify before enabling tools you don't trust">
+             <i class="bi bi-people-fill"></i> ${this.#escapeHtml(server.shared_by || "shared")}
+           </span>`
+        : ""
       serverDiv.innerHTML = `
         <div class="mcp-server-header" data-action="click->tool-selector#toggleServer" data-server-uuid="${escapedUuid}">
           <i class="bi bi-chevron-right server-toggle-icon"></i>
           <i class="bi bi-server"></i>
           <span class="mcp-server-name">${this.#escapeHtml(server.name)}</span>
+          ${sharedBadge}
           ${server.tools && server.tools.length > 0 ? `<span class="tool-available-count">${server.tools.filter((t) => t.active).length} tools</span>` : ""}
         </div>
         <div class="mcp-server-tools" data-server-tools="${escapedUuid}" style="display: none;" data-loaded="${server.tools && server.tools.length > 0 ? "true" : "false"}">

data/lib/generators/llm_meta_client/scaffold/templates/app/models/chat.rb CHANGED Viewed

@@ -6,41 +6,22 @@ class Chat < ApplicationRecord
   before_create :set_uuid
-  # Find existing chat from session or create new one
-  class << self
-    def find_or_switch_for_session(session, current_user)
-      chat = find_by_session_chat_id(session, current_user)
-      return chat if chat.present?
-      chat = create!(user: current_user)
-      session[:chat_id] = chat.id
-      chat
-    end
-    private
-    def find_by_session_chat_id(session, current_user)
-      return nil unless session[:chat_id].present?
-      if current_user.present?
-        includes(:messages).find_by(id: session[:chat_id], user_id: current_user.id)
-      else
-        includes(:messages).find_by(id: session[:chat_id], user_id: nil)
-      end
-    end
-  end
   # Add a user message to the chat
-  def add_user_message(message, llm_uuid, model, branch_from_execution_id = nil)
+  def add_user_message(message, llm_uuid, model, branch_from_execution_id = nil, llm_platform: nil, image: nil)
     previous_id = if branch_from_execution_id.present?
       PromptNavigator::PromptExecution.find_by(execution_id: branch_from_execution_id)&.id
     else
       messages.where(role: "user").order(:created_at).last&.prompt_navigator_prompt_execution_id
     end
+    # Prepend the attached image as a data-URI markdown image so the saved
+    # prompt renders the image on reload, and so the streaming controller
+    # (reached over a GET EventSource) can recover the image from pe.prompt.
+    prompt_with_image = image.present? ? "![](data:#{image[:mime]};base64,#{image[:data_b64]})\n\n#{message}" : message
     prompt_execution = PromptNavigator::PromptExecution.create!(
-      prompt: message,
+      prompt: prompt_with_image,
       llm_uuid: llm_uuid,
       model: model,
+      llm_platform: llm_platform,
       configuration: "",
       previous_id: previous_id
     )
@@ -72,17 +53,41 @@ class Chat < ApplicationRecord
   # Returns the assembled content (with markdown "Tool calls" section appended
   # if tools fired). Caller is responsible for persistence.
   def stream_assistant_response(prompt_execution, jwt_token, tool_ids: [], generation_settings: {}, &block)
-    summarized_context, prompt = build_streaming_context(prompt_execution, jwt_token, with_tools: tool_ids.any?)
-    LlmMetaClient::ServerQuery.new.stream(
-      jwt_token,
-      prompt_execution.llm_uuid,
-      prompt_execution.model,
-      summarized_context,
-      prompt,
-      tool_ids: tool_ids,
-      generation_settings: generation_settings,
-      &block
-    )
+    last_msg = ordered_messages.last
+    pe = last_msg.prompt_navigator_prompt_execution
+    # Separate any attached image (data-URI markdown at the head of the
+    # prompt) from the text. The image flows as a structured field; the text
+    # goes through the usual prompt path.
+    text_prompt, attached_image = extract_attached_image(pe.prompt)
+    prompt = { role: last_msg.role, prompt: text_prompt }
+    if image_model?(prompt_execution.model)
+      image_context = pe.build_context(limit: Rails.configuration.summarize_conversation_count)
+      LlmMetaClient::ServerQuery.new.stream(
+        jwt_token,
+        prompt_execution.llm_uuid,
+        prompt_execution.model,
+        "",
+        prompt,
+        generation_settings: generation_settings,
+        image_context: image_context,
+        image: attached_image,
+        &block
+      )
+    else
+      summarized_context, prompt = build_streaming_context(prompt_execution, jwt_token, with_tools: tool_ids.any?)
+      LlmMetaClient::ServerQuery.new.stream(
+        jwt_token,
+        prompt_execution.llm_uuid,
+        prompt_execution.model,
+        summarized_context,
+        prompt,
+        tool_ids: tool_ids,
+        generation_settings: generation_settings,
+        image: attached_image,
+        &block
+      )
+    end
   end
   # Persist the streamed assistant response. Skips persistence if content is blank.
@@ -90,7 +95,7 @@ class Chat < ApplicationRecord
     return nil if content.blank?
     prompt_execution.update!(
-      llm_platform: resolve_llm_type(prompt_execution.llm_uuid, jwt_token),
+      llm_platform: prompt_execution.llm_platform.presence || resolve_llm_type(prompt_execution.llm_uuid, jwt_token),
       response: content
     )
     messages.create!(
@@ -127,16 +132,40 @@ class Chat < ApplicationRecord
   # Summarize the user's prompt into a short title via LLM (required by ChatManager::TitleGeneratable)
   def summarize_for_title(prompt_text, jwt_token)
+    # Strip any attached data-URI image from the prompt before titling; the
+    # summarizer shouldn't see the image markdown (it leads to titles like
+    # "Undefined Image" derived from the empty alt text).
+    text_only, _image = extract_attached_image(prompt_text)
+    return nil if text_only.blank?
     latest_pe = ordered_by_descending_prompt_executions.first
     return nil unless latest_pe&.llm_uuid && latest_pe&.model
-    LlmMetaClient::ServerQuery.new.call(
+    raw = LlmMetaClient::ServerQuery.new.call(
       jwt_token,
       latest_pe.llm_uuid,
       latest_pe.model,
       "No context available.",
-      { role: "user", prompt: "Please summarize the following text into a short title (max 50 characters). Respond with only the title, nothing else: #{prompt_text}" }
+      { role: "user", prompt: "Please summarize the following text into a short title (max 50 characters). Respond with only the title, nothing else: #{text_only}" }
     )
+    strip_title_markdown(raw)
+  end
+  # LLMs frequently wrap titles in markdown emphasis (**bold**, *italic*),
+  # backtick-code, leading "# heading" marks, or surrounding quotes —
+  # sometimes despite explicit instructions to return plain text. Strip
+  # those artifacts so the chat sidebar shows a clean title.
+  def strip_title_markdown(text)
+    text.to_s
+        .gsub(/\A\s*#+\s*/, "")                          # leading "# "
+        .gsub(/`([^`]+)`/, '\1')                          # `inline code`
+        .gsub(/\*\*\*([^\*]+)\*\*\*/, '\1')               # ***triple***
+        .gsub(/\*\*([^\*]+)\*\*/, '\1')                   # **bold**
+        .gsub(/\*([^\*]+)\*/, '\1')                       # *italic*
+        .gsub(/__([^_]+)__/, '\1')                        # __bold__
+        .gsub(/_([^_]+)_/, '\1')                          # _italic_
+        .gsub(/\A["'“”‘’「『]+|["'“”‘’」』]+\z/, "")  # wrapping quotes
+        .strip
   end
   # Set a new UUID
@@ -167,16 +196,35 @@ class Chat < ApplicationRecord
     last_msg = ordered_messages.last
     pe = last_msg.prompt_navigator_prompt_execution
     prompt = { role: last_msg.role, prompt: pe.prompt }
-    context = pe.build_context(limit: Rails.configuration.summarize_conversation_count)
+    # Image-generation models don't take prior context. Summarizing through
+    # an image model would just generate an image as the "summary".
+    if image_model?(prompt_execution.model)
+      return [ "No context available.", prompt ]
+    end
+    verbatim_count = Rails.configuration.summarize_conversation_count
+    context = pe.build_context(limit: verbatim_count * 4)
     summarized_context =
       if context.empty?
         "No context available."
+      elsif context.size <= verbatim_count
+        # Within budget: replay recent turns verbatim, no summarization call.
+        format_transcript(context)
       else
-        LlmMetaClient::ServerQuery.new.call(
-          jwt_token, prompt_execution.llm_uuid, prompt_execution.model,
-          context, "Please summarize the context"
+        # Overflow: summarize the older slice, keep recent turns verbatim.
+        # Summarization runs on a cheap fixed model, not the user's selected
+        # one. Falls back to the user's model if it isn't available.
+        older = context[0...-verbatim_count]
+        recent = context.last(verbatim_count)
+        sum_uuid, sum_model =
+          summarization_target(llm_options) || [ prompt_execution.llm_uuid, prompt_execution.model ]
+        summary = LlmMetaClient::ServerQuery.new.call(
+          jwt_token, sum_uuid, sum_model,
+          older, "Please summarize the context"
         )
+        "Summary of earlier conversation: #{summary}\n\nRecent conversation:\n#{format_transcript(recent)}"
       end
     summarized_context += "Additional prompt: Responses from the assistant must consist solely of the response body."
     if with_tools
@@ -185,4 +233,42 @@ class Chat < ApplicationRecord
     [ summarized_context, prompt ]
   end
+  def image_model?(model_meta_id)
+    model_meta_id.to_s.include?("image")
+  end
+  def format_transcript(turns)
+    turns.map { |t| "User: #{t[:prompt]}\nAssistant: #{t[:response]}" }.join("\n\n")
+  end
+  # Pull a single leading `![](data:mime;base64,DATA)` image out of the prompt
+  # text. Returns [text_without_image, {mime:, data_b64:}|nil]. v1 supports a
+  # single image per turn.
+  ATTACHED_IMAGE_HEAD = /\A!\[[^\]]*\]\(data:([^;]+);base64,([^\)]+)\)\s*\n*/m
+  def extract_attached_image(prompt_text)
+    m = prompt_text.to_s.match(ATTACHED_IMAGE_HEAD)
+    return [ prompt_text.to_s, nil ] unless m
+    stripped = prompt_text.sub(ATTACHED_IMAGE_HEAD, "")
+    [ stripped, { mime: m[1], data_b64: m[2] } ]
+  end
+  # Cheap model used to condense overflow context. Configured via
+  # Rails.configuration.summarization_model (env LLM_SUMMARIZATION_MODEL
+  # or credentials[:llm_service][:summarization_model]). If the configured
+  # meta_id isn't in the ollama family's catalog at request time, the
+  # caller falls back to the user's selected model.
+  def summarization_target(llm_options)
+    ollama = llm_options.find { |o| o[:llm_type] == "ollama" }
+    return nil unless ollama
+    target = Rails.configuration.summarization_model
+    return nil unless target.present?
+    models = ollama[:available_models] || []
+    available = models.any? { |m| (m["value"] || m[:value]) == target }
+    return nil unless available
+    [ ollama[:uuid], target ]
+  end
 end

data/lib/generators/llm_meta_client/scaffold/templates/app/views/chats/_chat_sidebar.html.erb CHANGED Viewed

@@ -3,6 +3,8 @@
     ->(id) { chat_path(id) },
     active_uuid: chat&.uuid,
     download_csv_path: ->(id) { download_csv_chat_path(id) },
-    download_all_csv_path: download_all_csv_chats_path
+    delete_path: ->(id) { chat_path(id) },
+    batch_delete_path: batch_destroy_chats_path,
+    batch_download_csv_path: download_selected_csv_chats_path
   ) %>
 </div>

data/lib/generators/llm_meta_client/scaffold/templates/app/views/chats/_message.html.erb CHANGED Viewed

@@ -8,7 +8,9 @@
   </div>
   <div class="message-content">
     <%% if message.role == 'user' %>
-      <%%= simple_format(message.prompt_navigator_prompt_execution&.prompt) %>
+      <%% img, text = split_attached_image_html(message.prompt_navigator_prompt_execution&.prompt) %>
+      <%%= img if img %>
+      <%%= simple_format(text) %>
     <%% else %>
       <%%= simple_format(message.prompt_navigator_prompt_execution&.response) %>
     <%% end %>

data/lib/generators/llm_meta_client/scaffold/templates/app/views/chats/_streaming_message.html.erb CHANGED Viewed

@@ -7,6 +7,12 @@
 <div class="message assistant streaming"
      data-controller="message-stream"
      data-message-stream-url-value="<%%= stream_url %>">
+  <button type="button" class="stream-cancel-button"
+          title="Cancel"
+          data-message-stream-target="cancelButton"
+          data-action="click->message-stream#cancel">
+    <i class="bi bi-x-circle"></i>
+  </button>
   <div class="message-role">🤖 streaming…</div>
   <div class="message-content" data-message-stream-target="content"></div>
 </div>