llm_meta_client 1.2.0 β†’ 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5529e0613f2103802abfbf448ef5d030f63a7788e8fde7440d732fa94ced7378
4
- data.tar.gz: 38223a0a3ff727e9a649fc38db922e5f89aaaa858118464ef9d410a727fff5f1
3
+ metadata.gz: 2fcc6377f3293f8ecd13b81cd79ea63891c18f55dfa05ee225a151f7e1fa5b84
4
+ data.tar.gz: 35c4cba209aed5989b43606715205a7e2b85c669fa7dd71d65a33f4dd47a293a
5
5
  SHA512:
6
- metadata.gz: 2d86c22b05ff9991ff7087a370de97fc90adaf766f2a13745511d2ce7beb129828e169cbeb601b9d2996836f62fb79ca398a66a2e1fbd09a20cf78c148d5fb8a
7
- data.tar.gz: 55cc853db63cca50ace6693e4b4a124ee2923371fd6709b2ef1d92e3fdcb4bc62ad433533c2e832dfff132d6ab91bf608847d583d0fdc1104beb38c54597db8d
6
+ metadata.gz: c959d77e7d3b8c9f5070bf2f63a74e83ba1082391694788e094c09629e76883dcf0142336af7742dd08fa46c0d36ec20ad31ad85b558ea4199cb8b7e3c4345fc
7
+ data.tar.gz: 651d88ddb211fd11234daf2ecee22e368d8886d27e43b94c1786d4d3980cedfb078a562ee0cd0ad06a48140c7fb07cae779326237e604b21a819d4ceb663d815
data/CHANGELOG.md CHANGED
@@ -5,6 +5,23 @@ All notable changes to this project will be documented in this file.
5
5
  The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
6
6
  and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
7
7
 
8
+ ## [1.3.0] - 2026-05-10
9
+
10
+ ### Added
11
+
12
+ - Tool-call streaming end-to-end. `ServerQuery#stream` now accepts `tool_ids:` and yields a `tool_calls` event when the LLM decides to invoke MCP tools. Turn 1 (tool selection) runs synchronously; turn 2 (the follow-up after tool execution) is streamed.
13
+ - Scaffold renders a separate "πŸ›  Tool calls" bubble during streaming via the new `_tool_call_message.html.erb` partial. The Stimulus controller inserts it before the streaming bubble when `event: tool_calls` arrives, and removes it once the assistant message is saved (the saved bubble's combined markdown contains the tool-call section).
14
+ - `Chat#stream_assistant_response` accepts `tool_ids:` and threads them through. Persistence is unchanged β€” the saved `Message.response` includes a markdown "Tool calls" section appended to the response text, matching the existing synchronous shape.
15
+ - When `tool_ids` is non-empty, the system prompt is augmented with an instruction to explain tool errors rather than fail silently. Models that ignore the instruction are caught by a server-side fallback (see below).
16
+
17
+ ### Changed
18
+
19
+ - Streaming error messages now parse `error` + `message` from the response body so users see context (e.g. "Rate limit exceeded β€” check your provider plan…") instead of a bare HTTP code. Mid-stream `event: error` payloads with codes like `rate_limit` get the same friendlier treatment.
20
+
21
+ ### Notes
22
+
23
+ - Requires `llm_meta_server` with the matching tool-streaming additions: `LlmRbFacade.stream!` accepting `tools:` + `on_tool_calls:` and an `Api::ChatStreamsController` that emits the `tool_calls` SSE event. Server-side fixes that ship alongside this release: an Anthropic-tool-only-response rehydrator (Claude tool-only completions weren't surfacing through `Session#functions`), and a sink injection that emits MCP `isError: true` payloads as text deltas before turn 2 (Gemini sometimes returns nothing after a tool error and would otherwise leave the bubble blank).
24
+
8
25
  ## [1.2.0] - 2026-05-10
9
26
 
10
27
  ### Added
@@ -31,6 +31,7 @@ module LlmMetaClient
31
31
  template "app/views/chats/update.turbo_stream.erb"
32
32
  template "app/views/chats/_message.html.erb"
33
33
  template "app/views/chats/_streaming_message.html.erb"
34
+ template "app/views/chats/_tool_call_message.html.erb"
34
35
  template "app/views/chats/_chat_sidebar.html.erb"
35
36
  template "app/views/chats/_messages_list.html.erb"
36
37
  template "app/views/shared/_family_field.html.erb"
@@ -17,9 +17,18 @@ class ChatStreamsController < ApplicationController
17
17
 
18
18
  jwt_token = current_user.id_token if user_signed_in?
19
19
  generation_settings = parse_generation_settings(params[:generation_settings_json])
20
+ tool_ids = Array(params[:tool_ids]).reject(&:blank?)
20
21
 
21
- assembled = chat.stream_assistant_response(prompt_execution, jwt_token, generation_settings: generation_settings) do |event|
22
- forward(event)
22
+ assembled = chat.stream_assistant_response(prompt_execution, jwt_token, tool_ids: tool_ids, generation_settings: generation_settings) do |event|
23
+ if event[:event] == "tool_calls"
24
+ tool_calls = event[:data]["tool_calls"] || []
25
+ forward(event: "tool_calls", data: {
26
+ tool_calls: tool_calls,
27
+ html: view_context.render(partial: "chats/tool_call_message", locals: { tool_calls: tool_calls })
28
+ })
29
+ else
30
+ forward(event)
31
+ end
23
32
  end
24
33
 
25
34
  if assembled.present?
@@ -97,6 +97,7 @@ class ChatsController < ApplicationController
97
97
  # The streaming bubble is rendered by create.turbo_stream.erb and opens
98
98
  # the EventSource on connect; persistence + title gen happen at stream close.
99
99
  @generation_settings_json = params[:generation_settings_json]
100
+ @tool_ids = Array(params[:tool_ids]).reject(&:blank?)
100
101
  end
101
102
 
102
103
  # Return turbo stream to render both messages
@@ -189,6 +190,7 @@ class ChatsController < ApplicationController
189
190
  # The assistant response is streamed by ChatStreamsController (SSE).
190
191
  # See create action for details.
191
192
  @generation_settings_json = params[:generation_settings_json]
193
+ @tool_ids = Array(params[:tool_ids]).reject(&:blank?)
192
194
  end
193
195
 
194
196
  # Return turbo stream to render both messages
@@ -14,6 +14,7 @@ export default class extends Controller {
14
14
  this.source.addEventListener("done", () => this.#onDone())
15
15
  this.source.addEventListener("title", (e) => this.#onTitle(e))
16
16
  this.source.addEventListener("saved", (e) => this.#onSaved(e))
17
+ this.source.addEventListener("tool_calls", (e) => this.#onToolCalls(e))
17
18
  this.source.addEventListener("error", (e) => this.#onError(e))
18
19
  }
19
20
 
@@ -43,9 +44,31 @@ export default class extends Controller {
43
44
  const data = JSON.parse(event.data)
44
45
  this.element.dataset.savedExecutionId = data.execution_id
45
46
  if (data.html) this.#swapInRenderedMessage(data.html)
47
+ // The saved bubble's content already includes any tool calls section in
48
+ // markdown; remove the transient tool-call bubbles so reload and live look
49
+ // the same.
50
+ this.#removeTransientToolCallBubbles()
46
51
  } catch {}
47
52
  }
48
53
 
54
+ #onToolCalls(event) {
55
+ try {
56
+ const data = JSON.parse(event.data)
57
+ if (!data.html) return
58
+ const wrapper = document.createElement("template")
59
+ wrapper.innerHTML = data.html.trim()
60
+ const bubble = wrapper.content.firstElementChild
61
+ if (!bubble) return
62
+ bubble.classList.add("tool-call-streaming")
63
+ this.element.parentNode.insertBefore(bubble, this.element)
64
+ this.#scrollToBottom()
65
+ } catch {}
66
+ }
67
+
68
+ #removeTransientToolCallBubbles() {
69
+ document.querySelectorAll(".tool-call-streaming").forEach((el) => el.remove())
70
+ }
71
+
49
72
  // Swap the streaming bubble's role + content with the host-rendered _message
50
73
  // partial output so any markdown / syntax highlighting / partial customizations
51
74
  // applied on reload also apply right after the stream finishes. We don't
@@ -69,15 +69,17 @@ class Chat < ApplicationRecord
69
69
  end
70
70
 
71
71
  # Stream the assistant response from the LLM. Yields each parsed SSE event.
72
- # Returns the assembled content. Caller is responsible for persistence.
73
- def stream_assistant_response(prompt_execution, jwt_token, generation_settings: {}, &block)
74
- summarized_context, prompt = build_streaming_context(prompt_execution, jwt_token)
72
+ # Returns the assembled content (with markdown "Tool calls" section appended
73
+ # if tools fired). Caller is responsible for persistence.
74
+ def stream_assistant_response(prompt_execution, jwt_token, tool_ids: [], generation_settings: {}, &block)
75
+ summarized_context, prompt = build_streaming_context(prompt_execution, jwt_token, with_tools: tool_ids.any?)
75
76
  LlmMetaClient::ServerQuery.new.stream(
76
77
  jwt_token,
77
78
  prompt_execution.llm_uuid,
78
79
  prompt_execution.model,
79
80
  summarized_context,
80
81
  prompt,
82
+ tool_ids: tool_ids,
81
83
  generation_settings: generation_settings,
82
84
  &block
83
85
  )
@@ -144,7 +146,7 @@ class Chat < ApplicationRecord
144
146
 
145
147
  # Send messages to LLM and get response
146
148
  def send_to_llm(prompt_execution, jwt_token, tool_ids: [], generation_settings: {})
147
- summarized_context, prompt = build_streaming_context(prompt_execution, jwt_token)
149
+ summarized_context, prompt = build_streaming_context(prompt_execution, jwt_token, with_tools: tool_ids.any?)
148
150
  LlmMetaClient::ServerQuery.new.call(
149
151
  jwt_token,
150
152
  prompt_execution.llm_uuid,
@@ -158,7 +160,7 @@ class Chat < ApplicationRecord
158
160
 
159
161
  # Build the (summarized_context, prompt) tuple for an LLM call.
160
162
  # Shared by both the synchronous and streaming paths.
161
- def build_streaming_context(prompt_execution, jwt_token)
163
+ def build_streaming_context(prompt_execution, jwt_token, with_tools: false)
162
164
  llm_options = LlmMetaClient::ServerResource.available_llm_options(jwt_token)
163
165
  raise LlmMetaClient::Exceptions::OllamaUnavailableError, "No LLM available" if llm_options.empty?
164
166
 
@@ -177,6 +179,9 @@ class Chat < ApplicationRecord
177
179
  )
178
180
  end
179
181
  summarized_context += "Additional prompt: Responses from the assistant must consist solely of the response body."
182
+ if with_tools
183
+ summarized_context += " If a tool call returns an error, do not give up silently β€” explain the error and what likely caused it (e.g. an invalid argument value)."
184
+ end
180
185
 
181
186
  [ summarized_context, prompt ]
182
187
  end
@@ -1,4 +1,9 @@
1
- <%% stream_url = chat_stream_path(chat_id: chat.uuid, execution_id: prompt_execution.execution_id, generation_settings_json: @generation_settings_json.presence) %>
1
+ <%% stream_url = chat_stream_path(
2
+ chat_id: chat.uuid,
3
+ execution_id: prompt_execution.execution_id,
4
+ generation_settings_json: @generation_settings_json.presence,
5
+ tool_ids: (@tool_ids.presence || nil)
6
+ ) %>
2
7
  <div class="message assistant streaming"
3
8
  data-controller="message-stream"
4
9
  data-message-stream-url-value="<%%= stream_url %>">
@@ -0,0 +1,22 @@
1
+ <div class="message assistant tool-call">
2
+ <div class="message-role">πŸ›  Tool calls</div>
3
+ <div class="message-content">
4
+ <ul>
5
+ <%% tool_calls.each do |tc| %>
6
+ <%% name = tc["name"] || tc[:name] || "(unknown)" %>
7
+ <%% args = tc["arguments"] || tc[:arguments] %>
8
+ <%% args_str = case args
9
+ when Hash, Array then args.to_json
10
+ when nil, "" then nil
11
+ else args.to_s
12
+ end %>
13
+ <li>
14
+ <code><%%= name %></code>
15
+ <%% if args_str %>
16
+ β€” <code><%%= args_str %></code>
17
+ <%% end %>
18
+ </li>
19
+ <%% end %>
20
+ </ul>
21
+ </div>
22
+ </div>
@@ -5,23 +5,31 @@ require "json"
5
5
  module LlmMetaClient
6
6
  class ServerQuery
7
7
  # Stream LLM responses incrementally. Yields each content delta event
8
- # ({ event: "message", data: { "delta" => "..." } }) to the caller's block.
9
- # Upstream "done" markers are absorbed (end-of-stream is signaled by the
10
- # block returning); upstream "error" events raise ServerError.
11
- # Returns the assembled content string. Tool calls are not supported here.
12
- def stream(id_token, api_key_uuid, model_id, context, user_content, generation_settings: {})
8
+ # ({ event: "message", data: { "delta" => "..." } }) and any tool_calls
9
+ # event ({ event: "tool_calls", data: { "tool_calls" => [...] } }) to the
10
+ # caller's block. Upstream "done" markers are absorbed (end-of-stream is
11
+ # signaled by the block returning); upstream "error" events raise ServerError.
12
+ # Returns the final assistant content. If tool calls fired, the returned
13
+ # string mirrors the synchronous #call format (response + markdown
14
+ # "Tool calls" section appended) so persistence stays consistent.
15
+ def stream(id_token, api_key_uuid, model_id, context, user_content, tool_ids: [], generation_settings: {})
13
16
  context_and_user_content = "Context:#{context}, User Prompt: #{user_content}"
14
17
  debug_log "Streaming request to LLM: \n===>\n#{context_and_user_content}\n===>"
15
18
 
16
19
  body = { prompt: context_and_user_content }
20
+ body[:tool_ids] = tool_ids if tool_ids.present?
17
21
  body[:generation_settings] = generation_settings if generation_settings.present?
18
22
 
19
23
  assembled = +""
24
+ collected_tool_calls = []
20
25
  request_stream(api_key_uuid, id_token, model_id, body) do |event|
21
26
  case event[:event]
22
27
  when "message"
23
28
  assembled << event[:data]["delta"].to_s
24
29
  yield event if block_given?
30
+ when "tool_calls"
31
+ collected_tool_calls = event[:data]["tool_calls"] || []
32
+ yield event if block_given?
25
33
  when "done"
26
34
  # End-of-stream marker from upstream; no-op here.
27
35
  when "error"
@@ -31,7 +39,7 @@ module LlmMetaClient
31
39
  end
32
40
  end
33
41
 
34
- assembled
42
+ collected_tool_calls.any? ? combine_with_tool_calls(assembled, collected_tool_calls) : assembled
35
43
  end
36
44
 
37
45
  def call(id_token, api_key_uuid, model_id, context, user_content, tool_ids: [], generation_settings: {})
@@ -1,3 +1,3 @@
1
1
  module LlmMetaClient
2
- VERSION = "1.2.0"
2
+ VERSION = "1.3.0"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: llm_meta_client
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - dhq_boiler
@@ -125,6 +125,7 @@ files:
125
125
  - lib/generators/llm_meta_client/scaffold/templates/app/views/chats/_message.html.erb
126
126
  - lib/generators/llm_meta_client/scaffold/templates/app/views/chats/_messages_list.html.erb
127
127
  - lib/generators/llm_meta_client/scaffold/templates/app/views/chats/_streaming_message.html.erb
128
+ - lib/generators/llm_meta_client/scaffold/templates/app/views/chats/_tool_call_message.html.erb
128
129
  - lib/generators/llm_meta_client/scaffold/templates/app/views/chats/create.turbo_stream.erb
129
130
  - lib/generators/llm_meta_client/scaffold/templates/app/views/chats/edit.html.erb
130
131
  - lib/generators/llm_meta_client/scaffold/templates/app/views/chats/new.html.erb