llm_meta_client 1.2.0 β 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -0
- data/lib/generators/llm_meta_client/scaffold/scaffold_generator.rb +1 -0
- data/lib/generators/llm_meta_client/scaffold/templates/app/controllers/chat_streams_controller.rb +11 -2
- data/lib/generators/llm_meta_client/scaffold/templates/app/controllers/chats_controller.rb +2 -0
- data/lib/generators/llm_meta_client/scaffold/templates/app/javascript/controllers/message_stream_controller.js +23 -0
- data/lib/generators/llm_meta_client/scaffold/templates/app/models/chat.rb +10 -5
- data/lib/generators/llm_meta_client/scaffold/templates/app/views/chats/_streaming_message.html.erb +6 -1
- data/lib/generators/llm_meta_client/scaffold/templates/app/views/chats/_tool_call_message.html.erb +22 -0
- data/lib/llm_meta_client/server_query.rb +14 -6
- data/lib/llm_meta_client/version.rb +1 -1
- metadata +2 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 2fcc6377f3293f8ecd13b81cd79ea63891c18f55dfa05ee225a151f7e1fa5b84
|
|
4
|
+
data.tar.gz: 35c4cba209aed5989b43606715205a7e2b85c669fa7dd71d65a33f4dd47a293a
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c959d77e7d3b8c9f5070bf2f63a74e83ba1082391694788e094c09629e76883dcf0142336af7742dd08fa46c0d36ec20ad31ad85b558ea4199cb8b7e3c4345fc
|
|
7
|
+
data.tar.gz: 651d88ddb211fd11234daf2ecee22e368d8886d27e43b94c1786d4d3980cedfb078a562ee0cd0ad06a48140c7fb07cae779326237e604b21a819d4ceb663d815
|
data/CHANGELOG.md
CHANGED
|
@@ -5,6 +5,23 @@ All notable changes to this project will be documented in this file.
|
|
|
5
5
|
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
|
|
6
6
|
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
7
|
|
|
8
|
+
## [1.3.0] - 2026-05-10
|
|
9
|
+
|
|
10
|
+
### Added
|
|
11
|
+
|
|
12
|
+
- Tool-call streaming end-to-end. `ServerQuery#stream` now accepts `tool_ids:` and yields a `tool_calls` event when the LLM decides to invoke MCP tools. Turn 1 (tool selection) runs synchronously; turn 2 (the follow-up after tool execution) is streamed.
|
|
13
|
+
- Scaffold renders a separate "π Tool calls" bubble during streaming via the new `_tool_call_message.html.erb` partial. The Stimulus controller inserts it before the streaming bubble when `event: tool_calls` arrives, and removes it once the assistant message is saved (the saved bubble's combined markdown contains the tool-call section).
|
|
14
|
+
- `Chat#stream_assistant_response` accepts `tool_ids:` and threads them through. Persistence is unchanged β the saved `Message.response` includes a markdown "Tool calls" section appended to the response text, matching the existing synchronous shape.
|
|
15
|
+
- When `tool_ids` is non-empty, the system prompt is augmented with an instruction to explain tool errors rather than fail silently. Models that ignore the instruction are caught by a server-side fallback (see below).
|
|
16
|
+
|
|
17
|
+
### Changed
|
|
18
|
+
|
|
19
|
+
- Streaming error messages now parse `error` + `message` from the response body so users see context (e.g. "Rate limit exceeded β check your provider planβ¦") instead of a bare HTTP code. Mid-stream `event: error` payloads with codes like `rate_limit` get the same friendlier treatment.
|
|
20
|
+
|
|
21
|
+
### Notes
|
|
22
|
+
|
|
23
|
+
- Requires `llm_meta_server` with the matching tool-streaming additions: `LlmRbFacade.stream!` accepting `tools:` + `on_tool_calls:` and an `Api::ChatStreamsController` that emits the `tool_calls` SSE event. Server-side fixes that ship alongside this release: an Anthropic-tool-only-response rehydrator (Claude tool-only completions weren't surfacing through `Session#functions`), and a sink injection that emits MCP `isError: true` payloads as text deltas before turn 2 (Gemini sometimes returns nothing after a tool error and would otherwise leave the bubble blank).
|
|
24
|
+
|
|
8
25
|
## [1.2.0] - 2026-05-10
|
|
9
26
|
|
|
10
27
|
### Added
|
|
@@ -31,6 +31,7 @@ module LlmMetaClient
|
|
|
31
31
|
template "app/views/chats/update.turbo_stream.erb"
|
|
32
32
|
template "app/views/chats/_message.html.erb"
|
|
33
33
|
template "app/views/chats/_streaming_message.html.erb"
|
|
34
|
+
template "app/views/chats/_tool_call_message.html.erb"
|
|
34
35
|
template "app/views/chats/_chat_sidebar.html.erb"
|
|
35
36
|
template "app/views/chats/_messages_list.html.erb"
|
|
36
37
|
template "app/views/shared/_family_field.html.erb"
|
data/lib/generators/llm_meta_client/scaffold/templates/app/controllers/chat_streams_controller.rb
CHANGED
|
@@ -17,9 +17,18 @@ class ChatStreamsController < ApplicationController
|
|
|
17
17
|
|
|
18
18
|
jwt_token = current_user.id_token if user_signed_in?
|
|
19
19
|
generation_settings = parse_generation_settings(params[:generation_settings_json])
|
|
20
|
+
tool_ids = Array(params[:tool_ids]).reject(&:blank?)
|
|
20
21
|
|
|
21
|
-
assembled = chat.stream_assistant_response(prompt_execution, jwt_token, generation_settings: generation_settings) do |event|
|
|
22
|
-
|
|
22
|
+
assembled = chat.stream_assistant_response(prompt_execution, jwt_token, tool_ids: tool_ids, generation_settings: generation_settings) do |event|
|
|
23
|
+
if event[:event] == "tool_calls"
|
|
24
|
+
tool_calls = event[:data]["tool_calls"] || []
|
|
25
|
+
forward(event: "tool_calls", data: {
|
|
26
|
+
tool_calls: tool_calls,
|
|
27
|
+
html: view_context.render(partial: "chats/tool_call_message", locals: { tool_calls: tool_calls })
|
|
28
|
+
})
|
|
29
|
+
else
|
|
30
|
+
forward(event)
|
|
31
|
+
end
|
|
23
32
|
end
|
|
24
33
|
|
|
25
34
|
if assembled.present?
|
|
@@ -97,6 +97,7 @@ class ChatsController < ApplicationController
|
|
|
97
97
|
# The streaming bubble is rendered by create.turbo_stream.erb and opens
|
|
98
98
|
# the EventSource on connect; persistence + title gen happen at stream close.
|
|
99
99
|
@generation_settings_json = params[:generation_settings_json]
|
|
100
|
+
@tool_ids = Array(params[:tool_ids]).reject(&:blank?)
|
|
100
101
|
end
|
|
101
102
|
|
|
102
103
|
# Return turbo stream to render both messages
|
|
@@ -189,6 +190,7 @@ class ChatsController < ApplicationController
|
|
|
189
190
|
# The assistant response is streamed by ChatStreamsController (SSE).
|
|
190
191
|
# See create action for details.
|
|
191
192
|
@generation_settings_json = params[:generation_settings_json]
|
|
193
|
+
@tool_ids = Array(params[:tool_ids]).reject(&:blank?)
|
|
192
194
|
end
|
|
193
195
|
|
|
194
196
|
# Return turbo stream to render both messages
|
|
@@ -14,6 +14,7 @@ export default class extends Controller {
|
|
|
14
14
|
this.source.addEventListener("done", () => this.#onDone())
|
|
15
15
|
this.source.addEventListener("title", (e) => this.#onTitle(e))
|
|
16
16
|
this.source.addEventListener("saved", (e) => this.#onSaved(e))
|
|
17
|
+
this.source.addEventListener("tool_calls", (e) => this.#onToolCalls(e))
|
|
17
18
|
this.source.addEventListener("error", (e) => this.#onError(e))
|
|
18
19
|
}
|
|
19
20
|
|
|
@@ -43,9 +44,31 @@ export default class extends Controller {
|
|
|
43
44
|
const data = JSON.parse(event.data)
|
|
44
45
|
this.element.dataset.savedExecutionId = data.execution_id
|
|
45
46
|
if (data.html) this.#swapInRenderedMessage(data.html)
|
|
47
|
+
// The saved bubble's content already includes any tool calls section in
|
|
48
|
+
// markdown; remove the transient tool-call bubbles so reload and live look
|
|
49
|
+
// the same.
|
|
50
|
+
this.#removeTransientToolCallBubbles()
|
|
46
51
|
} catch {}
|
|
47
52
|
}
|
|
48
53
|
|
|
54
|
+
#onToolCalls(event) {
|
|
55
|
+
try {
|
|
56
|
+
const data = JSON.parse(event.data)
|
|
57
|
+
if (!data.html) return
|
|
58
|
+
const wrapper = document.createElement("template")
|
|
59
|
+
wrapper.innerHTML = data.html.trim()
|
|
60
|
+
const bubble = wrapper.content.firstElementChild
|
|
61
|
+
if (!bubble) return
|
|
62
|
+
bubble.classList.add("tool-call-streaming")
|
|
63
|
+
this.element.parentNode.insertBefore(bubble, this.element)
|
|
64
|
+
this.#scrollToBottom()
|
|
65
|
+
} catch {}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
#removeTransientToolCallBubbles() {
|
|
69
|
+
document.querySelectorAll(".tool-call-streaming").forEach((el) => el.remove())
|
|
70
|
+
}
|
|
71
|
+
|
|
49
72
|
// Swap the streaming bubble's role + content with the host-rendered _message
|
|
50
73
|
// partial output so any markdown / syntax highlighting / partial customizations
|
|
51
74
|
// applied on reload also apply right after the stream finishes. We don't
|
|
@@ -69,15 +69,17 @@ class Chat < ApplicationRecord
|
|
|
69
69
|
end
|
|
70
70
|
|
|
71
71
|
# Stream the assistant response from the LLM. Yields each parsed SSE event.
|
|
72
|
-
# Returns the assembled content
|
|
73
|
-
|
|
74
|
-
|
|
72
|
+
# Returns the assembled content (with markdown "Tool calls" section appended
|
|
73
|
+
# if tools fired). Caller is responsible for persistence.
|
|
74
|
+
def stream_assistant_response(prompt_execution, jwt_token, tool_ids: [], generation_settings: {}, &block)
|
|
75
|
+
summarized_context, prompt = build_streaming_context(prompt_execution, jwt_token, with_tools: tool_ids.any?)
|
|
75
76
|
LlmMetaClient::ServerQuery.new.stream(
|
|
76
77
|
jwt_token,
|
|
77
78
|
prompt_execution.llm_uuid,
|
|
78
79
|
prompt_execution.model,
|
|
79
80
|
summarized_context,
|
|
80
81
|
prompt,
|
|
82
|
+
tool_ids: tool_ids,
|
|
81
83
|
generation_settings: generation_settings,
|
|
82
84
|
&block
|
|
83
85
|
)
|
|
@@ -144,7 +146,7 @@ class Chat < ApplicationRecord
|
|
|
144
146
|
|
|
145
147
|
# Send messages to LLM and get response
|
|
146
148
|
def send_to_llm(prompt_execution, jwt_token, tool_ids: [], generation_settings: {})
|
|
147
|
-
summarized_context, prompt = build_streaming_context(prompt_execution, jwt_token)
|
|
149
|
+
summarized_context, prompt = build_streaming_context(prompt_execution, jwt_token, with_tools: tool_ids.any?)
|
|
148
150
|
LlmMetaClient::ServerQuery.new.call(
|
|
149
151
|
jwt_token,
|
|
150
152
|
prompt_execution.llm_uuid,
|
|
@@ -158,7 +160,7 @@ class Chat < ApplicationRecord
|
|
|
158
160
|
|
|
159
161
|
# Build the (summarized_context, prompt) tuple for an LLM call.
|
|
160
162
|
# Shared by both the synchronous and streaming paths.
|
|
161
|
-
def build_streaming_context(prompt_execution, jwt_token)
|
|
163
|
+
def build_streaming_context(prompt_execution, jwt_token, with_tools: false)
|
|
162
164
|
llm_options = LlmMetaClient::ServerResource.available_llm_options(jwt_token)
|
|
163
165
|
raise LlmMetaClient::Exceptions::OllamaUnavailableError, "No LLM available" if llm_options.empty?
|
|
164
166
|
|
|
@@ -177,6 +179,9 @@ class Chat < ApplicationRecord
|
|
|
177
179
|
)
|
|
178
180
|
end
|
|
179
181
|
summarized_context += "Additional prompt: Responses from the assistant must consist solely of the response body."
|
|
182
|
+
if with_tools
|
|
183
|
+
summarized_context += " If a tool call returns an error, do not give up silently β explain the error and what likely caused it (e.g. an invalid argument value)."
|
|
184
|
+
end
|
|
180
185
|
|
|
181
186
|
[ summarized_context, prompt ]
|
|
182
187
|
end
|
data/lib/generators/llm_meta_client/scaffold/templates/app/views/chats/_streaming_message.html.erb
CHANGED
|
@@ -1,4 +1,9 @@
|
|
|
1
|
-
<%% stream_url = chat_stream_path(
|
|
1
|
+
<%% stream_url = chat_stream_path(
|
|
2
|
+
chat_id: chat.uuid,
|
|
3
|
+
execution_id: prompt_execution.execution_id,
|
|
4
|
+
generation_settings_json: @generation_settings_json.presence,
|
|
5
|
+
tool_ids: (@tool_ids.presence || nil)
|
|
6
|
+
) %>
|
|
2
7
|
<div class="message assistant streaming"
|
|
3
8
|
data-controller="message-stream"
|
|
4
9
|
data-message-stream-url-value="<%%= stream_url %>">
|
data/lib/generators/llm_meta_client/scaffold/templates/app/views/chats/_tool_call_message.html.erb
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
<div class="message assistant tool-call">
|
|
2
|
+
<div class="message-role">π Tool calls</div>
|
|
3
|
+
<div class="message-content">
|
|
4
|
+
<ul>
|
|
5
|
+
<%% tool_calls.each do |tc| %>
|
|
6
|
+
<%% name = tc["name"] || tc[:name] || "(unknown)" %>
|
|
7
|
+
<%% args = tc["arguments"] || tc[:arguments] %>
|
|
8
|
+
<%% args_str = case args
|
|
9
|
+
when Hash, Array then args.to_json
|
|
10
|
+
when nil, "" then nil
|
|
11
|
+
else args.to_s
|
|
12
|
+
end %>
|
|
13
|
+
<li>
|
|
14
|
+
<code><%%= name %></code>
|
|
15
|
+
<%% if args_str %>
|
|
16
|
+
β <code><%%= args_str %></code>
|
|
17
|
+
<%% end %>
|
|
18
|
+
</li>
|
|
19
|
+
<%% end %>
|
|
20
|
+
</ul>
|
|
21
|
+
</div>
|
|
22
|
+
</div>
|
|
@@ -5,23 +5,31 @@ require "json"
|
|
|
5
5
|
module LlmMetaClient
|
|
6
6
|
class ServerQuery
|
|
7
7
|
# Stream LLM responses incrementally. Yields each content delta event
|
|
8
|
-
# ({ event: "message", data: { "delta" => "..." } })
|
|
9
|
-
#
|
|
10
|
-
# block
|
|
11
|
-
#
|
|
12
|
-
|
|
8
|
+
# ({ event: "message", data: { "delta" => "..." } }) and any tool_calls
|
|
9
|
+
# event ({ event: "tool_calls", data: { "tool_calls" => [...] } }) to the
|
|
10
|
+
# caller's block. Upstream "done" markers are absorbed (end-of-stream is
|
|
11
|
+
# signaled by the block returning); upstream "error" events raise ServerError.
|
|
12
|
+
# Returns the final assistant content. If tool calls fired, the returned
|
|
13
|
+
# string mirrors the synchronous #call format (response + markdown
|
|
14
|
+
# "Tool calls" section appended) so persistence stays consistent.
|
|
15
|
+
def stream(id_token, api_key_uuid, model_id, context, user_content, tool_ids: [], generation_settings: {})
|
|
13
16
|
context_and_user_content = "Context:#{context}, User Prompt: #{user_content}"
|
|
14
17
|
debug_log "Streaming request to LLM: \n===>\n#{context_and_user_content}\n===>"
|
|
15
18
|
|
|
16
19
|
body = { prompt: context_and_user_content }
|
|
20
|
+
body[:tool_ids] = tool_ids if tool_ids.present?
|
|
17
21
|
body[:generation_settings] = generation_settings if generation_settings.present?
|
|
18
22
|
|
|
19
23
|
assembled = +""
|
|
24
|
+
collected_tool_calls = []
|
|
20
25
|
request_stream(api_key_uuid, id_token, model_id, body) do |event|
|
|
21
26
|
case event[:event]
|
|
22
27
|
when "message"
|
|
23
28
|
assembled << event[:data]["delta"].to_s
|
|
24
29
|
yield event if block_given?
|
|
30
|
+
when "tool_calls"
|
|
31
|
+
collected_tool_calls = event[:data]["tool_calls"] || []
|
|
32
|
+
yield event if block_given?
|
|
25
33
|
when "done"
|
|
26
34
|
# End-of-stream marker from upstream; no-op here.
|
|
27
35
|
when "error"
|
|
@@ -31,7 +39,7 @@ module LlmMetaClient
|
|
|
31
39
|
end
|
|
32
40
|
end
|
|
33
41
|
|
|
34
|
-
assembled
|
|
42
|
+
collected_tool_calls.any? ? combine_with_tool_calls(assembled, collected_tool_calls) : assembled
|
|
35
43
|
end
|
|
36
44
|
|
|
37
45
|
def call(id_token, api_key_uuid, model_id, context, user_content, tool_ids: [], generation_settings: {})
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: llm_meta_client
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- dhq_boiler
|
|
@@ -125,6 +125,7 @@ files:
|
|
|
125
125
|
- lib/generators/llm_meta_client/scaffold/templates/app/views/chats/_message.html.erb
|
|
126
126
|
- lib/generators/llm_meta_client/scaffold/templates/app/views/chats/_messages_list.html.erb
|
|
127
127
|
- lib/generators/llm_meta_client/scaffold/templates/app/views/chats/_streaming_message.html.erb
|
|
128
|
+
- lib/generators/llm_meta_client/scaffold/templates/app/views/chats/_tool_call_message.html.erb
|
|
128
129
|
- lib/generators/llm_meta_client/scaffold/templates/app/views/chats/create.turbo_stream.erb
|
|
129
130
|
- lib/generators/llm_meta_client/scaffold/templates/app/views/chats/edit.html.erb
|
|
130
131
|
- lib/generators/llm_meta_client/scaffold/templates/app/views/chats/new.html.erb
|