llm_meta_client 1.3.0 β†’ 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +38 -0
  3. data/lib/generators/llm_meta_client/scaffold/scaffold_generator.rb +12 -7
  4. data/lib/generators/llm_meta_client/scaffold/templates/app/controllers/api/mcp_servers_controller.rb +2 -2
  5. data/lib/generators/llm_meta_client/scaffold/templates/app/controllers/chat_streams_controller.rb +24 -2
  6. data/lib/generators/llm_meta_client/scaffold/templates/app/controllers/chats_controller.rb +92 -76
  7. data/lib/generators/llm_meta_client/scaffold/templates/app/controllers/prompts_controller.rb +28 -1
  8. data/lib/generators/llm_meta_client/scaffold/templates/app/javascript/controllers/asset_actions_controller.js +98 -0
  9. data/lib/generators/llm_meta_client/scaffold/templates/app/javascript/controllers/chat_controller.js +126 -0
  10. data/lib/generators/llm_meta_client/scaffold/templates/app/javascript/controllers/chat_menu_controller.js +42 -0
  11. data/lib/generators/llm_meta_client/scaffold/templates/app/javascript/controllers/chat_title_edit_controller.js +5 -0
  12. data/lib/generators/llm_meta_client/scaffold/templates/app/javascript/controllers/chats_form_controller.js +186 -12
  13. data/lib/generators/llm_meta_client/scaffold/templates/app/javascript/controllers/generation_settings_controller.js +38 -20
  14. data/lib/generators/llm_meta_client/scaffold/templates/app/javascript/controllers/input_controls_controller.js +55 -0
  15. data/lib/generators/llm_meta_client/scaffold/templates/app/javascript/controllers/llm_toggle_controller.js +27 -0
  16. data/lib/generators/llm_meta_client/scaffold/templates/app/javascript/controllers/message_stream_controller.js +128 -3
  17. data/lib/generators/llm_meta_client/scaffold/templates/app/javascript/controllers/model_picker_controller.js +160 -0
  18. data/lib/generators/llm_meta_client/scaffold/templates/app/javascript/controllers/tool_selector_controller.js +10 -2
  19. data/lib/generators/llm_meta_client/scaffold/templates/app/models/chat.rb +130 -44
  20. data/lib/generators/llm_meta_client/scaffold/templates/app/views/chats/_chat_sidebar.html.erb +3 -1
  21. data/lib/generators/llm_meta_client/scaffold/templates/app/views/chats/_message.html.erb +3 -1
  22. data/lib/generators/llm_meta_client/scaffold/templates/app/views/chats/_streaming_message.html.erb +6 -0
  23. data/lib/generators/llm_meta_client/scaffold/templates/app/views/chats/_tool_call_message.html.erb +20 -18
  24. data/lib/generators/llm_meta_client/scaffold/templates/app/views/chats/create.turbo_stream.erb +31 -0
  25. data/lib/generators/llm_meta_client/scaffold/templates/app/views/chats/destroy.turbo_stream.erb +3 -0
  26. data/lib/generators/llm_meta_client/scaffold/templates/app/views/chats/edit.html.erb +53 -17
  27. data/lib/generators/llm_meta_client/scaffold/templates/app/views/chats/new.html.erb +50 -17
  28. data/lib/generators/llm_meta_client/scaffold/templates/app/views/layouts/_header.html.erb +1 -5
  29. data/lib/generators/llm_meta_client/scaffold/templates/app/views/layouts/_new_chat_button.html.erb +7 -0
  30. data/lib/generators/llm_meta_client/scaffold/templates/app/views/layouts/application.html.erb +2 -2
  31. data/lib/generators/llm_meta_client/scaffold/templates/app/views/shared/_generation_settings_field.html.erb +7 -5
  32. data/lib/generators/llm_meta_client/scaffold/templates/app/views/shared/_model_grid.html.erb +88 -0
  33. data/lib/generators/llm_meta_client/scaffold/templates/app/views/shared/_quick_picks.html.erb +67 -0
  34. data/lib/generators/llm_meta_client/scaffold/templates/app/views/shared/_tool_selector_field.html.erb +1 -1
  35. data/lib/llm_meta_client/helpers.rb +18 -0
  36. data/lib/llm_meta_client/server_query.rb +24 -6
  37. data/lib/llm_meta_client/version.rb +1 -1
  38. metadata +11 -6
  39. data/lib/generators/llm_meta_client/scaffold/templates/app/javascript/controllers/llm_selector_controller.js +0 -236
  40. data/lib/generators/llm_meta_client/scaffold/templates/app/views/chats/update.turbo_stream.erb +0 -85
  41. data/lib/generators/llm_meta_client/scaffold/templates/app/views/shared/_api_key_field.html.erb +0 -15
  42. data/lib/generators/llm_meta_client/scaffold/templates/app/views/shared/_family_field.html.erb +0 -18
  43. data/lib/generators/llm_meta_client/scaffold/templates/app/views/shared/_model_field.html.erb +0 -12
@@ -4,17 +4,20 @@ import { Controller } from "@hotwired/stimulus"
4
4
  // Opens an EventSource on connect, appends each delta to the content target,
5
5
  // closes on `done` / `error`.
6
6
  export default class extends Controller {
7
- static targets = ["content"]
7
+ static targets = ["content", "cancelButton"]
8
8
  static values = { url: String }
9
9
 
10
10
  connect() {
11
11
  this.completed = false
12
+ this.cancelled = false
12
13
  this.source = new EventSource(this.urlValue)
13
14
  this.source.addEventListener("message", (e) => this.#onDelta(e))
14
15
  this.source.addEventListener("done", () => this.#onDone())
15
16
  this.source.addEventListener("title", (e) => this.#onTitle(e))
16
17
  this.source.addEventListener("saved", (e) => this.#onSaved(e))
17
18
  this.source.addEventListener("tool_calls", (e) => this.#onToolCalls(e))
19
+ this.source.addEventListener("thinking", (e) => this.#onThinking(e))
20
+ this.source.addEventListener("phase", (e) => this.#onPhase(e))
18
21
  this.source.addEventListener("error", (e) => this.#onError(e))
19
22
  }
20
23
 
@@ -26,10 +29,20 @@ export default class extends Controller {
26
29
  let delta
27
30
  try { delta = JSON.parse(event.data).delta } catch { return }
28
31
  if (!delta) return
32
+ // First content delta after a "thinking" phase: flip the role label so
33
+ // the user knows generation has actually started.
34
+ this.#exitThinkingPhase()
29
35
  this.contentTarget.append(delta)
30
36
  this.#scrollToBottom()
31
37
  }
32
38
 
39
+ #exitThinkingPhase() {
40
+ const role = this.element.querySelector(".message-role")
41
+ if (role && role.textContent.includes("Thinking")) {
42
+ role.textContent = "πŸ€– streaming…"
43
+ }
44
+ }
45
+
33
46
  #onTitle(event) {
34
47
  try {
35
48
  const data = JSON.parse(event.data)
@@ -48,9 +61,87 @@ export default class extends Controller {
48
61
  // markdown; remove the transient tool-call bubbles so reload and live look
49
62
  // the same.
50
63
  this.#removeTransientToolCallBubbles()
64
+ // Fold the live transient sections now (not on `done`) β€” `done` fires
65
+ // AFTER the synchronous title-generation round-trip, which can take
66
+ // several seconds and would leave the reasoning section open the
67
+ // whole time. Saved is the natural moment: the final message is
68
+ // already persisted and rendered.
69
+ this.#foldTransientSections()
51
70
  } catch {}
52
71
  }
53
72
 
73
+ // Thinking deltas (Ollama hybrid models with `think: true`) β€” rendered
74
+ // live in a <details> block above the assistant content. Ephemeral:
75
+ // the server never persists thinking, so this block won't reappear on
76
+ // page reload.
77
+ #onThinking(event) {
78
+ let delta
79
+ try { delta = JSON.parse(event.data).delta } catch { return }
80
+ if (!delta) return
81
+ const body = this.#thinkingContentEl()
82
+ body.append(delta)
83
+ // Keep the scroll glued to the bottom of the fixed-height thinking
84
+ // viewport so users see the latest reasoning as it streams.
85
+ body.scrollTop = body.scrollHeight
86
+ this.#scrollToBottom()
87
+ }
88
+
89
+ #thinkingContentEl() {
90
+ if (this._thinkingContent) return this._thinkingContent
91
+ const details = document.createElement("details")
92
+ // `thinking-active` triggers the dots animation on the summary while
93
+ // reasoning is in progress; it's removed by #foldTransientSections
94
+ // at end-of-stream. The body is fixed-height + scrollable while open
95
+ // (see chats.css) so a long reasoning trace doesn't dominate the
96
+ // screen even when expanded.
97
+ details.className = "message-thinking thinking-active"
98
+ // Open during streaming; #foldTransientSections collapses it once
99
+ // the assistant's final message is saved.
100
+ details.open = true
101
+ const summary = document.createElement("summary")
102
+ summary.textContent = "Reasoning"
103
+ // Three staggered-fade dots after "Reasoning". CSS handles the
104
+ // animation; the dots are hidden by default and revealed by the
105
+ // .thinking-active class on the wrapping <details>.
106
+ const dots = document.createElement("span")
107
+ dots.className = "thinking-dots"
108
+ for (let i = 0; i < 3; i += 1) {
109
+ const dot = document.createElement("span")
110
+ dot.textContent = "."
111
+ dots.appendChild(dot)
112
+ }
113
+ summary.appendChild(dots)
114
+ const body = document.createElement("div")
115
+ body.className = "message-thinking-content"
116
+ details.appendChild(summary)
117
+ details.appendChild(body)
118
+ this.contentTarget.parentNode.insertBefore(details, this.contentTarget)
119
+ this._thinkingContent = body
120
+ this._thinkingDetails = details
121
+ return body
122
+ }
123
+
124
+ // Collapse the transient streaming-only sections (thinking + live
125
+ // tool-call bubbles) so the assistant's final message gets the focus.
126
+ // Called from #onDone and cancel(); #onSaved goes further and removes
127
+ // the live tool-call bubbles outright, so this is mostly cosmetic for
128
+ // the no-save (empty content) and cancel paths.
129
+ #foldTransientSections() {
130
+ if (this._thinkingDetails) {
131
+ this._thinkingDetails.open = false
132
+ // Stop the live "thinking…" animation now that streaming has finished.
133
+ this._thinkingDetails.classList.remove("thinking-active")
134
+ }
135
+ document.querySelectorAll(".tool-call-streaming details.tool-calls-section[open]")
136
+ .forEach((d) => { d.open = false })
137
+ }
138
+
139
+ #removeThinkingBlock() {
140
+ const el = this._thinkingContent?.parentNode
141
+ if (el && el.parentNode) el.parentNode.removeChild(el)
142
+ this._thinkingContent = null
143
+ }
144
+
54
145
  #onToolCalls(event) {
55
146
  try {
56
147
  const data = JSON.parse(event.data)
@@ -69,6 +160,21 @@ export default class extends Controller {
69
160
  document.querySelectorAll(".tool-call-streaming").forEach((el) => el.remove())
70
161
  }
71
162
 
163
+ // Phase events from the server signal what it's currently doing during the
164
+ // synchronous parts of a tool turn (model thinking, tool execution). The role
165
+ // label reflects the phase so users know progress is real, not a hang.
166
+ #onPhase(event) {
167
+ let name
168
+ try { name = JSON.parse(event.data).name } catch { return }
169
+ const role = this.element.querySelector(".message-role")
170
+ if (!role) return
171
+ const labels = {
172
+ thinking: "πŸ€” Thinking…",
173
+ responding: "πŸ€– streaming…",
174
+ }
175
+ if (labels[name]) role.textContent = labels[name]
176
+ }
177
+
72
178
  // Swap the streaming bubble's role + content with the host-rendered _message
73
179
  // partial output so any markdown / syntax highlighting / partial customizations
74
180
  // applied on reload also apply right after the stream finishes. We don't
@@ -92,13 +198,32 @@ export default class extends Controller {
92
198
 
93
199
  #onDone() {
94
200
  this.completed = true
201
+ this.#foldTransientSections()
202
+ this.#close()
203
+ }
204
+
205
+ // User clicked the cancel button. Closing the EventSource is enough to
206
+ // cascade cancellation upstream β€” the host's next stream write will raise
207
+ // ClientDisconnected, propagating cleanly all the way to the provider HTTP
208
+ // socket. The host's controller persists whatever partial content was
209
+ // forwarded so the bubble's content matches what's saved on reload.
210
+ cancel() {
211
+ if (this.completed || this.cancelled) return
212
+ this.cancelled = true
213
+ const role = this.element.querySelector(".message-role")
214
+ if (role) role.textContent = "🚫 cancelled"
215
+ this.element.classList.remove("streaming")
216
+ this.element.classList.add("cancelled")
217
+ if (this.hasCancelButtonTarget) this.cancelButtonTarget.remove()
218
+ this.#foldTransientSections()
95
219
  this.#close()
96
220
  }
97
221
 
98
222
  #onError(event) {
99
223
  // EventSource fires onerror whenever the connection closes β€” including
100
- // immediately after a clean `event: done`. Suppress those.
101
- if (this.completed) {
224
+ // immediately after a clean `event: done` or a user-initiated cancel.
225
+ // Suppress those.
226
+ if (this.completed || this.cancelled) {
102
227
  this.#close()
103
228
  return
104
229
  }
@@ -0,0 +1,160 @@
1
+ import { Controller } from "@hotwired/stimulus"
2
+
3
+ // Connects to data-controller="model-picker"
4
+ //
5
+ // Replaces the previous cascading family β†’ api_key β†’ model dropdowns
6
+ // with a single mechanism that backs both:
7
+ // * the quick-picks row below the prompt (Default + favorites), and
8
+ // * the "Other models" grid panel.
9
+ //
10
+ // Every clickable element (`.quick-pick-button`, `.model-grid-cell`)
11
+ // carries `data-family`, `data-api-key-uuid`, `data-model`, and
12
+ // `data-supports-vision`. Clicking writes those values into hidden form
13
+ // fields (`family`, `api_key_uuid`, `model`) so the chat form submits
14
+ // exactly what the meta-server's API expects.
15
+ //
16
+ // On connect it picks a sensible initial selection: the user's default
17
+ // (the .is-default button) if present, otherwise the first quick-pick,
18
+ // otherwise the first grid cell.
19
+ export default class extends Controller {
20
+ static targets = ["family", "apiKey", "model", "supportsVision", "quickPicks"]
21
+
22
+ connect() {
23
+ // Continuing-chat path: the server pre-populated hidden fields with the
24
+ // chat's most recent (family, api_key, model). Honor that so the picker
25
+ // opens on the same model the chat was last using β€” otherwise the user
26
+ // sees no indication of what their next prompt will hit. Falls through
27
+ // to #pickInitial for new chats (empty fields).
28
+ if (this.hasModelTarget && this.modelTarget.value) {
29
+ const match = this.element.querySelector(
30
+ `[data-model="${CSS.escape(this.modelTarget.value)}"]`
31
+ )
32
+ if (match) { this.#clickAsPick(match); return }
33
+ // The previously-used model is no longer in the visible options
34
+ // (removed from the catalog, key access lost, etc.). Re-apply from
35
+ // the hidden-field values directly so the form still submits
36
+ // cleanly; #ensureQuickPickFor synthesizes a transient pill.
37
+ this.#applySelection({
38
+ family: this.hasFamilyTarget ? this.familyTarget.value : "",
39
+ apiKeyUuid: this.hasApiKeyTarget ? this.apiKeyTarget.value : "",
40
+ model: this.modelTarget.value,
41
+ supportsVision: this.modelTarget.dataset.supportsVision === "true",
42
+ label: this.modelTarget.value
43
+ })
44
+ return
45
+ }
46
+ this.#pickInitial()
47
+ }
48
+
49
+ // Action: data-action="click->model-picker#pick"
50
+ pick(event) {
51
+ const el = event.currentTarget
52
+ this.#applySelection({
53
+ family: el.dataset.family,
54
+ apiKeyUuid: el.dataset.apiKeyUuid,
55
+ model: el.dataset.model,
56
+ supportsVision: el.dataset.supportsVision === "true",
57
+ label: this.#labelFor(el),
58
+ })
59
+
60
+ // If the click came from inside the "Other models" panel, collapse
61
+ // it now that a selection has been made.
62
+ const panel = el.closest('.llm-toggle-panel')
63
+ if (panel && panel.style.display !== "none") {
64
+ const toggle = panel.closest('.llm-toggle-field')
65
+ const toggleButton = toggle?.querySelector('.llm-toggle-button')
66
+ toggleButton?.click()
67
+ }
68
+ }
69
+
70
+ #pickInitial() {
71
+ // Prefer the marked default if it's present in the quick-picks row.
72
+ const defaultBtn = this.element.querySelector(".quick-pick-button.is-default")
73
+ if (defaultBtn) { this.#clickAsPick(defaultBtn); return }
74
+ const firstQuick = this.element.querySelector(".quick-pick-button")
75
+ if (firstQuick) { this.#clickAsPick(firstQuick); return }
76
+ const firstCell = this.element.querySelector(".model-grid-cell")
77
+ if (firstCell) { this.#clickAsPick(firstCell) }
78
+ }
79
+
80
+ // Apply the same logic as a real click without firing the user-event
81
+ // side effects (collapsing the panel). Used at #connect time.
82
+ #clickAsPick(el) {
83
+ this.#applySelection({
84
+ family: el.dataset.family,
85
+ apiKeyUuid: el.dataset.apiKeyUuid,
86
+ model: el.dataset.model,
87
+ supportsVision: el.dataset.supportsVision === "true",
88
+ label: this.#labelFor(el),
89
+ })
90
+ }
91
+
92
+ #applySelection({ family, apiKeyUuid, model, supportsVision, label }) {
93
+ if (this.hasFamilyTarget) this.familyTarget.value = family || ""
94
+ if (this.hasApiKeyTarget) this.apiKeyTarget.value = apiKeyUuid || ""
95
+ if (this.hasModelTarget) {
96
+ this.modelTarget.value = model || ""
97
+ // Expose supports_vision via a dataset attribute on the model
98
+ // input β€” chats_form_controller reads it to drive the attach
99
+ // button's enabled state (vision-only models accept images).
100
+ this.modelTarget.dataset.supportsVision = supportsVision ? "true" : "false"
101
+ }
102
+
103
+ // If the picked model has no representation in the quick-picks row
104
+ // (i.e. came from the grid and isn't a favorite / default), add a
105
+ // transient pill so the user always sees what's currently selected.
106
+ if (model) this.#ensureQuickPickFor({ family, apiKeyUuid, model, supportsVision, label })
107
+
108
+ // Mark the picked element(s) as selected; clear others. Match by
109
+ // data-model so any other button representing the same model (e.g.,
110
+ // the same favorite appearing in both the quick-picks row and the
111
+ // grid cell) also highlights.
112
+ this.element.querySelectorAll(".quick-pick-button.is-selected, .model-grid-cell.is-selected")
113
+ .forEach((el) => el.classList.remove("is-selected"))
114
+ if (model) {
115
+ this.element.querySelectorAll(`[data-model="${CSS.escape(model)}"]`)
116
+ .forEach((el) => el.classList.add("is-selected"))
117
+ }
118
+
119
+ // Tell chats-form to refresh the send button / attach button state.
120
+ this.dispatch("changed", { bubbles: true })
121
+ }
122
+
123
+ // Inject a transient `.quick-pick-button.is-transient` into the row
124
+ // when the picked model isn't already represented there. At most one
125
+ // transient pill exists at a time β€” it's replaced on each grid pick.
126
+ #ensureQuickPickFor({ family, apiKeyUuid, model, supportsVision, label }) {
127
+ if (!this.hasQuickPicksTarget) return
128
+ const existing = this.quickPicksTarget.querySelector(
129
+ `.quick-pick-button[data-model="${CSS.escape(model)}"]`
130
+ )
131
+ if (existing) {
132
+ // Already in the row (default or favorite). Drop any stale
133
+ // transient pill so we don't leave a duplicate of a previous pick.
134
+ this.quickPicksTarget.querySelectorAll(".quick-pick-button.is-transient")
135
+ .forEach((el) => el.remove())
136
+ return
137
+ }
138
+ // Otherwise rebuild the single transient slot.
139
+ this.quickPicksTarget.querySelectorAll(".quick-pick-button.is-transient")
140
+ .forEach((el) => el.remove())
141
+ const btn = document.createElement("button")
142
+ btn.type = "button"
143
+ btn.className = "quick-pick-button is-transient"
144
+ btn.title = model
145
+ btn.dataset.family = family || ""
146
+ btn.dataset.apiKeyUuid = apiKeyUuid || ""
147
+ btn.dataset.model = model
148
+ btn.dataset.supportsVision = supportsVision ? "true" : "false"
149
+ btn.dataset.action = `click->${this.identifier}#pick`
150
+ btn.textContent = label || model
151
+ this.quickPicksTarget.appendChild(btn)
152
+ }
153
+
154
+ #labelFor(el) {
155
+ // Grid cells wrap the name in `.model-grid-label`; quick-pick
156
+ // buttons just put text directly inside. Either way, trim it.
157
+ const gridLabel = el.querySelector?.(".model-grid-label")
158
+ return (gridLabel?.textContent || el.textContent || "").trim()
159
+ }
160
+ }
@@ -25,13 +25,15 @@ export default class extends Controller {
25
25
  this.panelTarget.style.display = this.expanded ? "block" : "none"
26
26
 
27
27
  if (this.hasToggleIconTarget) {
28
- this.toggleIconTarget.classList.toggle("bi-chevron-down", !this.expanded)
29
- this.toggleIconTarget.classList.toggle("bi-chevron-up", this.expanded)
28
+ this.toggleIconTarget.classList.toggle("bi-chevron-up", !this.expanded)
29
+ this.toggleIconTarget.classList.toggle("bi-chevron-down", this.expanded)
30
30
  }
31
31
 
32
32
  if (this.expanded && this.mcpServers.length === 0) {
33
33
  this.#fetchMcpServers()
34
34
  }
35
+
36
+ if (this.expanded) this.dispatch("opened", { bubbles: true })
35
37
  }
36
38
 
37
39
  toggleServer(event) {
@@ -108,11 +110,17 @@ export default class extends Controller {
108
110
  const serverDiv = document.createElement("div")
109
111
  serverDiv.className = "mcp-server-item"
110
112
  const escapedUuid = this.#escapeAttr(server.uuid)
113
+ const sharedBadge = server.owned === false
114
+ ? `<span class="mcp-server-shared-badge" title="Shared by ${this.#escapeAttr(server.shared_by || "another user")} β€” verify before enabling tools you don't trust">
115
+ <i class="bi bi-people-fill"></i> ${this.#escapeHtml(server.shared_by || "shared")}
116
+ </span>`
117
+ : ""
111
118
  serverDiv.innerHTML = `
112
119
  <div class="mcp-server-header" data-action="click->tool-selector#toggleServer" data-server-uuid="${escapedUuid}">
113
120
  <i class="bi bi-chevron-right server-toggle-icon"></i>
114
121
  <i class="bi bi-server"></i>
115
122
  <span class="mcp-server-name">${this.#escapeHtml(server.name)}</span>
123
+ ${sharedBadge}
116
124
  ${server.tools && server.tools.length > 0 ? `<span class="tool-available-count">${server.tools.filter((t) => t.active).length} tools</span>` : ""}
117
125
  </div>
118
126
  <div class="mcp-server-tools" data-server-tools="${escapedUuid}" style="display: none;" data-loaded="${server.tools && server.tools.length > 0 ? "true" : "false"}">
@@ -6,41 +6,22 @@ class Chat < ApplicationRecord
6
6
 
7
7
  before_create :set_uuid
8
8
 
9
- # Find existing chat from session or create new one
10
- class << self
11
- def find_or_switch_for_session(session, current_user)
12
- chat = find_by_session_chat_id(session, current_user)
13
- return chat if chat.present?
14
-
15
- chat = create!(user: current_user)
16
- session[:chat_id] = chat.id
17
- chat
18
- end
19
-
20
- private
21
-
22
- def find_by_session_chat_id(session, current_user)
23
- return nil unless session[:chat_id].present?
24
-
25
- if current_user.present?
26
- includes(:messages).find_by(id: session[:chat_id], user_id: current_user.id)
27
- else
28
- includes(:messages).find_by(id: session[:chat_id], user_id: nil)
29
- end
30
- end
31
- end
32
-
33
9
  # Add a user message to the chat
34
- def add_user_message(message, llm_uuid, model, branch_from_execution_id = nil)
10
+ def add_user_message(message, llm_uuid, model, branch_from_execution_id = nil, llm_platform: nil, image: nil)
35
11
  previous_id = if branch_from_execution_id.present?
36
12
  PromptNavigator::PromptExecution.find_by(execution_id: branch_from_execution_id)&.id
37
13
  else
38
14
  messages.where(role: "user").order(:created_at).last&.prompt_navigator_prompt_execution_id
39
15
  end
16
+ # Prepend the attached image as a data-URI markdown image so the saved
17
+ # prompt renders the image on reload, and so the streaming controller
18
+ # (reached over a GET EventSource) can recover the image from pe.prompt.
19
+ prompt_with_image = image.present? ? "![](data:#{image[:mime]};base64,#{image[:data_b64]})\n\n#{message}" : message
40
20
  prompt_execution = PromptNavigator::PromptExecution.create!(
41
- prompt: message,
21
+ prompt: prompt_with_image,
42
22
  llm_uuid: llm_uuid,
43
23
  model: model,
24
+ llm_platform: llm_platform,
44
25
  configuration: "",
45
26
  previous_id: previous_id
46
27
  )
@@ -72,17 +53,41 @@ class Chat < ApplicationRecord
72
53
  # Returns the assembled content (with markdown "Tool calls" section appended
73
54
  # if tools fired). Caller is responsible for persistence.
74
55
  def stream_assistant_response(prompt_execution, jwt_token, tool_ids: [], generation_settings: {}, &block)
75
- summarized_context, prompt = build_streaming_context(prompt_execution, jwt_token, with_tools: tool_ids.any?)
76
- LlmMetaClient::ServerQuery.new.stream(
77
- jwt_token,
78
- prompt_execution.llm_uuid,
79
- prompt_execution.model,
80
- summarized_context,
81
- prompt,
82
- tool_ids: tool_ids,
83
- generation_settings: generation_settings,
84
- &block
85
- )
56
+ last_msg = ordered_messages.last
57
+ pe = last_msg.prompt_navigator_prompt_execution
58
+ # Separate any attached image (data-URI markdown at the head of the
59
+ # prompt) from the text. The image flows as a structured field; the text
60
+ # goes through the usual prompt path.
61
+ text_prompt, attached_image = extract_attached_image(pe.prompt)
62
+ prompt = { role: last_msg.role, prompt: text_prompt }
63
+
64
+ if image_model?(prompt_execution.model)
65
+ image_context = pe.build_context(limit: Rails.configuration.summarize_conversation_count)
66
+ LlmMetaClient::ServerQuery.new.stream(
67
+ jwt_token,
68
+ prompt_execution.llm_uuid,
69
+ prompt_execution.model,
70
+ "",
71
+ prompt,
72
+ generation_settings: generation_settings,
73
+ image_context: image_context,
74
+ image: attached_image,
75
+ &block
76
+ )
77
+ else
78
+ summarized_context, prompt = build_streaming_context(prompt_execution, jwt_token, with_tools: tool_ids.any?)
79
+ LlmMetaClient::ServerQuery.new.stream(
80
+ jwt_token,
81
+ prompt_execution.llm_uuid,
82
+ prompt_execution.model,
83
+ summarized_context,
84
+ prompt,
85
+ tool_ids: tool_ids,
86
+ generation_settings: generation_settings,
87
+ image: attached_image,
88
+ &block
89
+ )
90
+ end
86
91
  end
87
92
 
88
93
  # Persist the streamed assistant response. Skips persistence if content is blank.
@@ -90,7 +95,7 @@ class Chat < ApplicationRecord
90
95
  return nil if content.blank?
91
96
 
92
97
  prompt_execution.update!(
93
- llm_platform: resolve_llm_type(prompt_execution.llm_uuid, jwt_token),
98
+ llm_platform: prompt_execution.llm_platform.presence || resolve_llm_type(prompt_execution.llm_uuid, jwt_token),
94
99
  response: content
95
100
  )
96
101
  messages.create!(
@@ -127,16 +132,40 @@ class Chat < ApplicationRecord
127
132
 
128
133
  # Summarize the user's prompt into a short title via LLM (required by ChatManager::TitleGeneratable)
129
134
  def summarize_for_title(prompt_text, jwt_token)
135
+ # Strip any attached data-URI image from the prompt before titling; the
136
+ # summarizer shouldn't see the image markdown (it leads to titles like
137
+ # "Undefined Image" derived from the empty alt text).
138
+ text_only, _image = extract_attached_image(prompt_text)
139
+ return nil if text_only.blank?
140
+
130
141
  latest_pe = ordered_by_descending_prompt_executions.first
131
142
  return nil unless latest_pe&.llm_uuid && latest_pe&.model
132
143
 
133
- LlmMetaClient::ServerQuery.new.call(
144
+ raw = LlmMetaClient::ServerQuery.new.call(
134
145
  jwt_token,
135
146
  latest_pe.llm_uuid,
136
147
  latest_pe.model,
137
148
  "No context available.",
138
- { role: "user", prompt: "Please summarize the following text into a short title (max 50 characters). Respond with only the title, nothing else: #{prompt_text}" }
149
+ { role: "user", prompt: "Please summarize the following text into a short title (max 50 characters). Respond with only the title, nothing else: #{text_only}" }
139
150
  )
151
+ strip_title_markdown(raw)
152
+ end
153
+
154
+ # LLMs frequently wrap titles in markdown emphasis (**bold**, *italic*),
155
+ # backtick-code, leading "# heading" marks, or surrounding quotes β€”
156
+ # sometimes despite explicit instructions to return plain text. Strip
157
+ # those artifacts so the chat sidebar shows a clean title.
158
+ def strip_title_markdown(text)
159
+ text.to_s
160
+ .gsub(/\A\s*#+\s*/, "") # leading "# "
161
+ .gsub(/`([^`]+)`/, '\1') # `inline code`
162
+ .gsub(/\*\*\*([^\*]+)\*\*\*/, '\1') # ***triple***
163
+ .gsub(/\*\*([^\*]+)\*\*/, '\1') # **bold**
164
+ .gsub(/\*([^\*]+)\*/, '\1') # *italic*
165
+ .gsub(/__([^_]+)__/, '\1') # __bold__
166
+ .gsub(/_([^_]+)_/, '\1') # _italic_
167
+ .gsub(/\A["'β€œβ€β€˜β€™γ€Œγ€Ž]+|["'β€œβ€β€˜β€™γ€γ€]+\z/, "") # wrapping quotes
168
+ .strip
140
169
  end
141
170
 
142
171
  # Set a new UUID
@@ -167,16 +196,35 @@ class Chat < ApplicationRecord
167
196
  last_msg = ordered_messages.last
168
197
  pe = last_msg.prompt_navigator_prompt_execution
169
198
  prompt = { role: last_msg.role, prompt: pe.prompt }
170
- context = pe.build_context(limit: Rails.configuration.summarize_conversation_count)
199
+
200
+ # Image-generation models don't take prior context. Summarizing through
201
+ # an image model would just generate an image as the "summary".
202
+ if image_model?(prompt_execution.model)
203
+ return [ "No context available.", prompt ]
204
+ end
205
+
206
+ verbatim_count = Rails.configuration.summarize_conversation_count
207
+ context = pe.build_context(limit: verbatim_count * 4)
171
208
 
172
209
  summarized_context =
173
210
  if context.empty?
174
211
  "No context available."
212
+ elsif context.size <= verbatim_count
213
+ # Within budget: replay recent turns verbatim, no summarization call.
214
+ format_transcript(context)
175
215
  else
176
- LlmMetaClient::ServerQuery.new.call(
177
- jwt_token, prompt_execution.llm_uuid, prompt_execution.model,
178
- context, "Please summarize the context"
216
+ # Overflow: summarize the older slice, keep recent turns verbatim.
217
+ # Summarization runs on a cheap fixed model, not the user's selected
218
+ # one. Falls back to the user's model if it isn't available.
219
+ older = context[0...-verbatim_count]
220
+ recent = context.last(verbatim_count)
221
+ sum_uuid, sum_model =
222
+ summarization_target(llm_options) || [ prompt_execution.llm_uuid, prompt_execution.model ]
223
+ summary = LlmMetaClient::ServerQuery.new.call(
224
+ jwt_token, sum_uuid, sum_model,
225
+ older, "Please summarize the context"
179
226
  )
227
+ "Summary of earlier conversation: #{summary}\n\nRecent conversation:\n#{format_transcript(recent)}"
180
228
  end
181
229
  summarized_context += "Additional prompt: Responses from the assistant must consist solely of the response body."
182
230
  if with_tools
@@ -185,4 +233,42 @@ class Chat < ApplicationRecord
185
233
 
186
234
  [ summarized_context, prompt ]
187
235
  end
236
+
237
+ def image_model?(model_meta_id)
238
+ model_meta_id.to_s.include?("image")
239
+ end
240
+
241
+ def format_transcript(turns)
242
+ turns.map { |t| "User: #{t[:prompt]}\nAssistant: #{t[:response]}" }.join("\n\n")
243
+ end
244
+
245
+ # Pull a single leading `![](data:mime;base64,DATA)` image out of the prompt
246
+ # text. Returns [text_without_image, {mime:, data_b64:}|nil]. v1 supports a
247
+ # single image per turn.
248
+ ATTACHED_IMAGE_HEAD = /\A!\[[^\]]*\]\(data:([^;]+);base64,([^\)]+)\)\s*\n*/m
249
+ def extract_attached_image(prompt_text)
250
+ m = prompt_text.to_s.match(ATTACHED_IMAGE_HEAD)
251
+ return [ prompt_text.to_s, nil ] unless m
252
+ stripped = prompt_text.sub(ATTACHED_IMAGE_HEAD, "")
253
+ [ stripped, { mime: m[1], data_b64: m[2] } ]
254
+ end
255
+
256
+ # Cheap model used to condense overflow context. Configured via
257
+ # Rails.configuration.summarization_model (env LLM_SUMMARIZATION_MODEL
258
+ # or credentials[:llm_service][:summarization_model]). If the configured
259
+ # meta_id isn't in the ollama family's catalog at request time, the
260
+ # caller falls back to the user's selected model.
261
+ def summarization_target(llm_options)
262
+ ollama = llm_options.find { |o| o[:llm_type] == "ollama" }
263
+ return nil unless ollama
264
+
265
+ target = Rails.configuration.summarization_model
266
+ return nil unless target.present?
267
+
268
+ models = ollama[:available_models] || []
269
+ available = models.any? { |m| (m["value"] || m[:value]) == target }
270
+ return nil unless available
271
+
272
+ [ ollama[:uuid], target ]
273
+ end
188
274
  end
@@ -3,6 +3,8 @@
3
3
  ->(id) { chat_path(id) },
4
4
  active_uuid: chat&.uuid,
5
5
  download_csv_path: ->(id) { download_csv_chat_path(id) },
6
- download_all_csv_path: download_all_csv_chats_path
6
+ delete_path: ->(id) { chat_path(id) },
7
+ batch_delete_path: batch_destroy_chats_path,
8
+ batch_download_csv_path: download_selected_csv_chats_path
7
9
  ) %>
8
10
  </div>
@@ -8,7 +8,9 @@
8
8
  </div>
9
9
  <div class="message-content">
10
10
  <%% if message.role == 'user' %>
11
- <%%= simple_format(message.prompt_navigator_prompt_execution&.prompt) %>
11
+ <%% img, text = split_attached_image_html(message.prompt_navigator_prompt_execution&.prompt) %>
12
+ <%%= img if img %>
13
+ <%%= simple_format(text) %>
12
14
  <%% else %>
13
15
  <%%= simple_format(message.prompt_navigator_prompt_execution&.response) %>
14
16
  <%% end %>
@@ -7,6 +7,12 @@
7
7
  <div class="message assistant streaming"
8
8
  data-controller="message-stream"
9
9
  data-message-stream-url-value="<%%= stream_url %>">
10
+ <button type="button" class="stream-cancel-button"
11
+ title="Cancel"
12
+ data-message-stream-target="cancelButton"
13
+ data-action="click->message-stream#cancel">
14
+ <i class="bi bi-x-circle"></i>
15
+ </button>
10
16
  <div class="message-role">πŸ€– streaming…</div>
11
17
  <div class="message-content" data-message-stream-target="content"></div>
12
18
  </div>