pikuri-core 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +5 -3
- data/lib/pikuri/agent/chat_transport.rb +135 -11
- data/lib/pikuri/agent/configurator.rb +4 -4
- data/lib/pikuri/agent/context_window_detector.rb +103 -52
- data/lib/pikuri/agent/control/step_limit.rb +39 -7
- data/lib/pikuri/agent/event.rb +43 -16
- data/lib/pikuri/agent/extension.rb +31 -17
- data/lib/pikuri/agent/extension_context.rb +147 -0
- data/lib/pikuri/agent/listener/terminal.rb +13 -2
- data/lib/pikuri/agent/listener/token_log.rb +60 -13
- data/lib/pikuri/agent/listener.rb +12 -5
- data/lib/pikuri/agent/listener_list.rb +7 -17
- data/lib/pikuri/agent/synthesizer.rb +93 -67
- data/lib/pikuri/agent.rb +358 -403
- data/lib/pikuri/sanitizer.rb +179 -0
- data/lib/pikuri/tool/parameters.rb +65 -2
- data/lib/pikuri/tool/search/brave.rb +32 -18
- data/lib/pikuri/tool/search/duckduckgo.rb +18 -7
- data/lib/pikuri/tool/search/engines.rb +72 -49
- data/lib/pikuri/tool/search/exa.rb +34 -22
- data/lib/pikuri/tool/web_search.rb +45 -26
- data/lib/pikuri/version.rb +1 -1
- data/lib/pikuri-core.rb +11 -9
- metadata +5 -6
data/lib/pikuri/agent.rb
CHANGED
|
@@ -18,10 +18,16 @@ module Pikuri
|
|
|
18
18
|
# Two seams are visible:
|
|
19
19
|
#
|
|
20
20
|
# 1. **Listeners** ({ListenerList} + {Listener::Base} subclasses)
|
|
21
|
-
# — pure consumers of the event stream
|
|
22
|
-
#
|
|
23
|
-
#
|
|
24
|
-
#
|
|
21
|
+
# — pure consumers of the event stream; they never write back.
|
|
22
|
+
# The +Agent+ emits every loop-narration {Event} variant;
|
|
23
|
+
# extensions emit their own domain events through the
|
|
24
|
+
# {ExtensionContext} capability facade handed to
|
|
25
|
+
# {Extension#bind}. There is no public path from an +Agent+
|
|
26
|
+
# reference to emission — no +listeners+ reader, no +chat+
|
|
27
|
+
# reader, no emit method — so holding an agent grants read
|
|
28
|
+
# access to its configuration and nothing more. New rendering
|
|
29
|
+
# or capture targets (a web sink, a structured log) are added
|
|
30
|
+
# as listeners without touching {Agent}.
|
|
25
31
|
# 2. **Controls** ({Control::StepLimit}, {Control::Cancellable},
|
|
26
32
|
# {Control::Interloper}) — host-facing signal holders. The
|
|
27
33
|
# +Agent+ reads from them at well-defined boundaries:
|
|
@@ -32,24 +38,32 @@ module Pikuri
|
|
|
32
38
|
# {Control::Interloper#drain!} on every +after_tool_result+.
|
|
33
39
|
#
|
|
34
40
|
# The two roles are named separately so "what fires when" is a
|
|
35
|
-
# single grep for +@listeners.emit+ in this file
|
|
41
|
+
# single grep for +@listeners.emit+ in this file (loop narration)
|
|
42
|
+
# plus the capability calls in {ExtensionContext} (domain events).
|
|
36
43
|
#
|
|
37
|
-
# == Step-exhaustion
|
|
44
|
+
# == Step-exhaustion policy
|
|
38
45
|
#
|
|
39
46
|
# If the +step_limit:+ {Control::StepLimit} trips during
|
|
40
|
-
#
|
|
41
|
-
#
|
|
42
|
-
#
|
|
43
|
-
#
|
|
44
|
-
#
|
|
45
|
-
#
|
|
46
|
-
#
|
|
47
|
-
#
|
|
48
|
-
#
|
|
49
|
-
#
|
|
50
|
-
#
|
|
51
|
-
#
|
|
52
|
-
#
|
|
47
|
+
# completion, {#run_loop} catches the +Exceeded+ exception and
|
|
48
|
+
# applies the budget's {Control::StepLimit#on_exhausted} policy
|
|
49
|
+
# (see that class header for how hosts pick):
|
|
50
|
+
#
|
|
51
|
+
# - +:raise+ (the default) — re-raise to the host, same shape as
|
|
52
|
+
# the cancellation rescue below. The chat history survives and
|
|
53
|
+
# {Control::StepLimit#reset!} fires at the next turn boundary,
|
|
54
|
+
# so a REPL user can simply say "continue".
|
|
55
|
+
# - +:synthesize+ — emit an {Event::FallbackNotice} and run the
|
|
56
|
+
# {Synthesizer} prompt on a nested tools-free +Agent+ (the
|
|
57
|
+
# same construction shape the +agent+ tool from
|
|
58
|
+
# +pikuri-subagents+ uses for sub-agents): parent's listener
|
|
59
|
+
# stream derived via {ListenerList#for_sub_agent} (Terminal
|
|
60
|
+
# padded, TokenLog zeroed, recorder shared by reference),
|
|
61
|
+
# parent's +cancellable+ shared so a user cancel during
|
|
62
|
+
# synthesis still works, a defensive +step_limit+ at +max: 1+
|
|
63
|
+
# (the synth has no tools and shouldn't tick it). The synth's
|
|
64
|
+
# answer becomes the value reported by
|
|
65
|
+
# {#last_assistant_content}, so callers (notably the +agent+
|
|
66
|
+
# tool from +pikuri-subagents+) still get a usable reply.
|
|
53
67
|
#
|
|
54
68
|
# == Cancellation rescue
|
|
55
69
|
#
|
|
@@ -68,253 +82,13 @@ module Pikuri
|
|
|
68
82
|
LOGGER = Pikuri.logger_for('Agent')
|
|
69
83
|
private_constant :LOGGER
|
|
70
84
|
|
|
71
|
-
#
|
|
72
|
-
#
|
|
73
|
-
#
|
|
74
|
-
#
|
|
75
|
-
#
|
|
76
|
-
#
|
|
77
|
-
# (
|
|
78
|
-
# the per-chunk streaming callback is separate because
|
|
79
|
-
# ruby_llm takes it as a block to +Chat#ask+ rather than a
|
|
80
|
-
# registered hook — see {.streaming_block}.
|
|
81
|
-
#
|
|
82
|
-
# @param chat [RubyLLM::Chat] the chat instance to wire
|
|
83
|
-
# @param listeners [ListenerList] the listener stream events
|
|
84
|
-
# flow into
|
|
85
|
-
# @param step_limit [Control::StepLimit, nil] when set,
|
|
86
|
-
# {Control::StepLimit#tick!} is poked on every
|
|
87
|
-
# +before_tool_call+ (and raises {Control::StepLimit::Exceeded}
|
|
88
|
-
# when over budget)
|
|
89
|
-
# @param cancellable [Control::Cancellable, nil] when set,
|
|
90
|
-
# {Control::Cancellable#check!} is poked on every
|
|
91
|
-
# +before_tool_call+ (and raises
|
|
92
|
-
# {Control::Cancellable::Cancelled} when the flag is up)
|
|
93
|
-
# @param interloper [Control::Interloper, nil] when set, the
|
|
94
|
-
# queue is drained on every +after_tool_result+, each item
|
|
95
|
-
# appended as a +role: :user+ message and emitted as
|
|
96
|
-
# {Event::UserTurn} with +mid_loop: true+
|
|
97
|
-
# @param on_user_message [Proc, nil] when set, called with each
|
|
98
|
-
# drained interloper +content+ String *after* it is appended
|
|
99
|
-
# to the chat — the per-turn {Extension#on_user_message}
|
|
100
|
-
# dispatch (prefetch + recording). Threaded through here rather
|
|
101
|
-
# than fired inline so {Synthesizer.run}, which reuses this
|
|
102
|
-
# wiring without an interloper or memory, simply passes +nil+.
|
|
103
|
-
# Only consulted when +interloper+ is also set.
|
|
104
|
-
# @return [void]
|
|
105
|
-
def self.wire_chat(chat, listeners:, step_limit: nil, cancellable: nil, interloper: nil,
|
|
106
|
-
on_user_message: nil)
|
|
107
|
-
chat.after_message do |msg|
|
|
108
|
-
emit_after_message(msg, listeners)
|
|
109
|
-
end
|
|
110
|
-
chat.before_tool_call do |tc|
|
|
111
|
-
listeners.emit(Event::ToolCall.new(name: tc.name, arguments: tc.arguments))
|
|
112
|
-
step_limit&.tick!
|
|
113
|
-
cancellable&.check!
|
|
114
|
-
end
|
|
115
|
-
chat.after_tool_result do |result|
|
|
116
|
-
listeners.emit(Event::ToolResult.new(content: result))
|
|
117
|
-
drain_interloper(interloper, chat, listeners, on_user_message) if interloper
|
|
118
|
-
end
|
|
119
|
-
end
|
|
120
|
-
|
|
121
|
-
# Build the per-chunk streaming block passed to +Chat#ask+.
|
|
122
|
-
# Each invocation of the returned proc converts one
|
|
123
|
-
# +RubyLLM::Chunk+ into zero, one, or two delta events
|
|
124
|
-
# ({Event::ThinkingDelta} / {Event::AssistantDelta}) on
|
|
125
|
-
# +listeners+. Tool-call chunks are intentionally ignored —
|
|
126
|
-
# partial JSON has no useful rendering; the assembled
|
|
127
|
-
# +tool_calls+ surface through {Event::ToolCall} once the
|
|
128
|
-
# message completes.
|
|
129
|
-
#
|
|
130
|
-
# Lives parallel to {.wire_chat} (instead of being folded into
|
|
131
|
-
# it) because +Chat#ask+ takes the streaming block as an
|
|
132
|
-
# argument rather than a registered callback, so both
|
|
133
|
-
# {#run_loop} and {Synthesizer.run} pass it inline at the call
|
|
134
|
-
# site with +&Agent.streaming_block(listeners: ..., cancellable: ...)+.
|
|
135
|
-
#
|
|
136
|
-
# == Cancellation polling
|
|
137
|
-
#
|
|
138
|
-
# When +cancellable+ is non-nil, {Control::Cancellable#check!}
|
|
139
|
-
# fires *before* each chunk's emit. The +before_tool_call+
|
|
140
|
-
# wiring in {.wire_chat} only fires when the model requests a
|
|
141
|
-
# tool, which leaves a no-tool turn (e.g. a plain greeting)
|
|
142
|
-
# with zero cancellation points — Ctrl+C trips the flag but
|
|
143
|
-
# nothing reads it. Polling on every streamed chunk closes
|
|
144
|
-
# that gap: an in-flight Cancellation+check! raises on the
|
|
145
|
-
# next chunk delivered after the flag flips, the exception
|
|
146
|
-
# propagates out through ruby_llm's streaming path
|
|
147
|
-
# (+Chat#ask+ doesn't rescue), and {#run_loop} catches it,
|
|
148
|
-
# emits {Event::Cancelled}, and re-raises. The pre-emit
|
|
149
|
-
# ordering is deliberate: a chunk that arrives after a cancel
|
|
150
|
-
# request shouldn't render — the user has said stop.
|
|
151
|
-
#
|
|
152
|
-
# @param listeners [ListenerList] the listener stream chunk
|
|
153
|
-
# events flow into
|
|
154
|
-
# @param cancellable [Control::Cancellable, nil] when non-nil,
|
|
155
|
-
# polled on every chunk so a flag flipped mid-stream raises
|
|
156
|
-
# {Control::Cancellable::Cancelled} on the very next chunk
|
|
157
|
-
# @return [Proc] a +-> (chunk) { ... }+ proc suitable for
|
|
158
|
-
# passing to +Chat#ask+ with +&+
|
|
159
|
-
def self.streaming_block(listeners:, cancellable: nil)
|
|
160
|
-
->(chunk) {
|
|
161
|
-
cancellable&.check!
|
|
162
|
-
emit_chunk(chunk, listeners)
|
|
163
|
-
}
|
|
164
|
-
end
|
|
165
|
-
|
|
166
|
-
# Normalize a +RubyLLM::Chat+ +after_message+ payload into
|
|
167
|
-
# zero, one, or two {Event} variants (+Thinking+ and/or
|
|
168
|
-
# +Assistant+) plus one {Event::Tokens} for the usage block.
|
|
169
|
-
# Empty thinking / empty content are filtered here so
|
|
170
|
-
# listeners never see vacuous events. Non-assistant roles
|
|
171
|
-
# (e.g. tool-role messages echoed back through
|
|
172
|
-
# +after_message+) are skipped entirely.
|
|
173
|
-
#
|
|
174
|
-
# +msg+ is a +RubyLLM::Message+. Beyond +role+, +content+,
|
|
175
|
-
# +thinking+, and the +*_tokens+ accessors used here, it also
|
|
176
|
-
# carries +msg.tool_calls+ on assistant turns that requested
|
|
177
|
-
# one and +msg.raw+ for the unparsed provider payload.
|
|
178
|
-
#
|
|
179
|
-
# @param msg [RubyLLM::Message]
|
|
180
|
-
# @param listeners [ListenerList]
|
|
181
|
-
# @return [void]
|
|
182
|
-
def self.emit_after_message(msg, listeners)
|
|
183
|
-
return unless msg.role == :assistant
|
|
184
|
-
|
|
185
|
-
text = msg.thinking&.text
|
|
186
|
-
listeners.emit(Event::Thinking.new(content: text)) if text && !text.empty?
|
|
187
|
-
|
|
188
|
-
content = msg.content
|
|
189
|
-
listeners.emit(Event::Assistant.new(content: content)) if content.is_a?(String) && !content.empty?
|
|
190
|
-
|
|
191
|
-
listeners.emit(Event::Tokens.new(
|
|
192
|
-
input: msg.input_tokens,
|
|
193
|
-
output: msg.output_tokens,
|
|
194
|
-
cached: msg.cached_tokens,
|
|
195
|
-
cache_creation: msg.cache_creation_tokens,
|
|
196
|
-
thinking: msg.thinking_tokens,
|
|
197
|
-
model_id: msg.model_id
|
|
198
|
-
))
|
|
199
|
-
end
|
|
200
|
-
private_class_method :emit_after_message
|
|
201
|
-
|
|
202
|
-
# Normalize a +RubyLLM::Chunk+ from a streaming +Chat#ask+
|
|
203
|
-
# into zero, one, or two delta events
|
|
204
|
-
# ({Event::ThinkingDelta} / {Event::AssistantDelta}). Empty
|
|
205
|
-
# +thinking.text+ and empty +content+ are filtered here so
|
|
206
|
-
# listeners never see vacuous fragments. Tool-call deltas are
|
|
207
|
-
# intentionally skipped — see {.streaming_block}.
|
|
208
|
-
#
|
|
209
|
-
# +chunk+ is a +RubyLLM::Chunk+ (subclass of +RubyLLM::Message+),
|
|
210
|
-
# so the same +.thinking+ / +.content+ accessors used in
|
|
211
|
-
# {.emit_after_message} apply.
|
|
212
|
-
#
|
|
213
|
-
# @param chunk [RubyLLM::Chunk]
|
|
214
|
-
# @param listeners [ListenerList]
|
|
215
|
-
# @return [void]
|
|
216
|
-
def self.emit_chunk(chunk, listeners)
|
|
217
|
-
thinking = chunk.thinking&.text
|
|
218
|
-
listeners.emit(Event::ThinkingDelta.new(content: thinking)) if thinking && !thinking.empty?
|
|
219
|
-
|
|
220
|
-
content = chunk.content
|
|
221
|
-
listeners.emit(Event::AssistantDelta.new(content: content)) if content.is_a?(String) && !content.empty?
|
|
222
|
-
end
|
|
223
|
-
private_class_method :emit_chunk
|
|
224
|
-
|
|
225
|
-
# Drain the interloper queue: for each pending item, append a
|
|
226
|
-
# +role: :user+ message to the chat history so the next
|
|
227
|
-
# round-trip sees it, emit an {Event::UserTurn} with
|
|
228
|
-
# +mid_loop: true+ to the listener stream so renderers see the
|
|
229
|
-
# injection, then run the per-turn {Extension#on_user_message}
|
|
230
|
-
# dispatch (so mid-loop injections are prefetched + recorded
|
|
231
|
-
# exactly like initial turns).
|
|
232
|
-
#
|
|
233
|
-
# The dispatch runs *after* the +:user+ append so any
|
|
234
|
-
# +<memory-context>+ it injects lands as a +:system+ message
|
|
235
|
-
# right behind the user turn it annotates — the same
|
|
236
|
-
# append-at-the-tail ordering {#run_loop} produces for initial
|
|
237
|
-
# turns.
|
|
238
|
-
#
|
|
239
|
-
# @param interloper [Control::Interloper]
|
|
240
|
-
# @param chat [RubyLLM::Chat]
|
|
241
|
-
# @param listeners [ListenerList]
|
|
242
|
-
# @param on_user_message [Proc, nil] per-content dispatch; +nil+
|
|
243
|
-
# skips it (e.g. an interloper with no memory extension wired)
|
|
244
|
-
# @return [void]
|
|
245
|
-
def self.drain_interloper(interloper, chat, listeners, on_user_message = nil)
|
|
246
|
-
interloper.drain!.each do |content|
|
|
247
|
-
chat.add_message(role: :user, content: content)
|
|
248
|
-
listeners.emit(Event::UserTurn.new(content: content, mid_loop: true))
|
|
249
|
-
on_user_message&.call(content)
|
|
250
|
-
end
|
|
251
|
-
end
|
|
252
|
-
private_class_method :drain_interloper
|
|
253
|
-
|
|
254
|
-
# One-shot inference. Builds a fresh +RubyLLM::Chat+ with no
|
|
255
|
-
# tools, no MCP, no listeners, no step budget, asks +prompt+ as
|
|
256
|
-
# the single user turn, and returns the assistant's reply as a
|
|
257
|
-
# plain String. Lives parallel to {#initialize} / {#run_loop}
|
|
258
|
-
# because the use case (e.g. summarizing an MCP server's tool
|
|
259
|
-
# set into a short description block before any agent turn
|
|
260
|
-
# runs) is genuinely one-shot — there is no loop, no tool
|
|
261
|
-
# iteration, no listener stream.
|
|
262
|
-
#
|
|
263
|
-
# +prompt+ is sent as the user message. For a one-shot call
|
|
264
|
-
# there is no behavioral difference between the system slot
|
|
265
|
-
# and the user slot, so we use one parameter; pack any
|
|
266
|
-
# "instructions + data" framing into +prompt+ directly.
|
|
267
|
-
#
|
|
268
|
-
# == Cancellation
|
|
269
|
-
#
|
|
270
|
-
# {Control::Cancellable#check!} fires once before the call and
|
|
271
|
-
# once after, so a flag flipped right around the request
|
|
272
|
-
# raises {Control::Cancellable::Cancelled} promptly. The
|
|
273
|
-
# in-flight HTTP call itself is *not* interrupted — same
|
|
274
|
-
# "gentle cancel" semantic the main loop offers (see
|
|
275
|
-
# {Control::Cancellable}'s class header). For 30s synthesis
|
|
276
|
-
# passes at boot this is still a useful escape hatch: the next
|
|
277
|
-
# check raises and the call returns.
|
|
278
|
-
#
|
|
279
|
-
# == Failure
|
|
280
|
-
#
|
|
281
|
-
# Errors from the provider (HTTP failure, malformed response,
|
|
282
|
-
# +RubyLLM+ raising) propagate to the caller verbatim — there
|
|
283
|
-
# is no recovery layer here. Callers that want "fail soft on
|
|
284
|
-
# synthesis errors" (e.g. {Mcp::Servers}) rescue at their level
|
|
285
|
-
# and fall back to a default; this method stays loud.
|
|
286
|
-
#
|
|
287
|
-
# @param transport [ChatTransport] same model-resolution
|
|
288
|
-
# triple {#initialize} uses; if +model+ is +nil+, falls
|
|
289
|
-
# back to +RubyLLM.config.default_model+
|
|
290
|
-
# @param prompt [String] the prompt sent as the single user
|
|
291
|
-
# turn; must be non-blank
|
|
292
|
-
# @param cancellable [Control::Cancellable, nil] when set,
|
|
293
|
-
# checked before the call so a flag flipped right
|
|
294
|
-
# around the request raises {Control::Cancellable::Cancelled}
|
|
295
|
-
# @return [String] the assistant's reply content
|
|
296
|
-
# @raise [ArgumentError] when +prompt+ is +nil+, empty, or
|
|
297
|
-
# whitespace-only
|
|
298
|
-
# @raise [Control::Cancellable::Cancelled] when the
|
|
299
|
-
# +cancellable+ flag was tripped at the pre-call check
|
|
300
|
-
def self.think(transport:, prompt:, cancellable: nil)
|
|
301
|
-
raise ArgumentError, "prompt must not be blank, got #{prompt.inspect}" \
|
|
302
|
-
if prompt.nil? || prompt.to_s.strip.empty?
|
|
303
|
-
|
|
304
|
-
transport = transport.with(model: RubyLLM.config.default_model) unless transport.model
|
|
305
|
-
cancellable&.check!
|
|
306
|
-
chat = RubyLLM.chat(**transport.to_h)
|
|
307
|
-
chat.ask(prompt)
|
|
308
|
-
last = chat.messages.reverse.find { |m| m.role == :assistant }
|
|
309
|
-
last&.content.to_s
|
|
310
|
-
end
|
|
311
|
-
|
|
312
|
-
# @param transport [ChatTransport] the model-resolution triple
|
|
313
|
-
# (+model+ / +provider+ / +assume_model_exists+) forwarded
|
|
314
|
-
# to +RubyLLM.chat+. Bundled into one value object so every
|
|
315
|
-
# construction site — this constructor and the synthesizer
|
|
316
|
-
# rescue below — can forward all three with one assignment
|
|
317
|
-
# instead of three kwargs (where dropping one would silently
|
|
85
|
+
# @param transport [ChatTransport] the model-resolution bundle
|
|
86
|
+
# (+model+ / +provider+ / +assume_model_exists+ and, for a model
|
|
87
|
+
# on a non-global server, +api_base+ / +api_key+) the chat is
|
|
88
|
+
# built from. Bundled into one value object so every construction
|
|
89
|
+
# site — this constructor, the synthesizer rescue below, a
|
|
90
|
+
# mid-conversation switch — can forward it with one assignment
|
|
91
|
+
# instead of loose kwargs (where dropping one would silently
|
|
318
92
|
# route the chat elsewhere or raise
|
|
319
93
|
# +RubyLLM::ModelNotFoundError+). If +transport.model+ is
|
|
320
94
|
# +nil+, it's filled in from +RubyLLM.config.default_model+.
|
|
@@ -326,7 +100,10 @@ module Pikuri
|
|
|
326
100
|
# @param step_limit [Control::StepLimit, nil] step budget
|
|
327
101
|
# control. When set, {Control::StepLimit#tick!} fires on
|
|
328
102
|
# every +before_tool_call+ and {Control::StepLimit#reset!}
|
|
329
|
-
# at the start of each turn
|
|
103
|
+
# at the start of each turn; the budget's
|
|
104
|
+
# {Control::StepLimit#on_exhausted} policy decides what
|
|
105
|
+
# {#run_loop} does when it trips (see "Step-exhaustion
|
|
106
|
+
# policy" in the class header). +nil+ means "no step budget"
|
|
330
107
|
# (the agent can loop indefinitely).
|
|
331
108
|
# @param cancellable [Control::Cancellable, nil] cancellation
|
|
332
109
|
# control. When set, {Control::Cancellable#check!} fires on
|
|
@@ -339,18 +116,6 @@ module Pikuri
|
|
|
339
116
|
# every +after_tool_result+ and each item becomes a
|
|
340
117
|
# {Event::UserTurn} with +mid_loop: true+. +nil+ means
|
|
341
118
|
# "no mid-loop injection" (the bundled CLIs default).
|
|
342
|
-
# @param context_window [Integer, nil] explicit override for
|
|
343
|
-
# the model's context-window cap. When set, it wins over
|
|
344
|
-
# ruby_llm's reported value and the llama.cpp probe — see
|
|
345
|
-
# {ContextWindowDetector} for precedence. Resolved cap is
|
|
346
|
-
# emitted as an {Event::ContextCap} immediately after
|
|
347
|
-
# construction.
|
|
348
|
-
# @param llama_probe_url [String, nil] llama.cpp +/props+ URL
|
|
349
|
-
# used as the third detection source. Only consulted when
|
|
350
|
-
# neither +context_window+ nor ruby_llm's reported value is
|
|
351
|
-
# set. Typically derived by +bin/pikuri-chat+ from its
|
|
352
|
-
# configured +openai_api_base+; leave +nil+ when the
|
|
353
|
-
# configured server is anything other than llama.cpp.
|
|
354
119
|
# @param id [String] unique identifier for this agent. Empty
|
|
355
120
|
# for the main agent; sub-agents get persona-rooted ids
|
|
356
121
|
# like +"researcher 0"+, +"researcher 1"+, +"file_miner 0"+, ...
|
|
@@ -362,8 +127,8 @@ module Pikuri
|
|
|
362
127
|
# the codebase for the persona-name load (the value the LLM
|
|
363
128
|
# picks in the +agent+ tool's +name:+ argument).
|
|
364
129
|
# @param streaming [Boolean] opt into chunk-level streaming.
|
|
365
|
-
# When +true+, {#run_loop} passes
|
|
366
|
-
#
|
|
130
|
+
# When +true+, {#run_loop} passes a per-chunk block to
|
|
131
|
+
# +Chat#complete+, and ruby_llm requests
|
|
367
132
|
# SSE responses from the provider — chunks are normalized
|
|
368
133
|
# into {Event::ThinkingDelta} / {Event::AssistantDelta} on
|
|
369
134
|
# the listener stream as they arrive. When +false+ (the
|
|
@@ -386,7 +151,7 @@ module Pikuri
|
|
|
386
151
|
# @return [Agent]
|
|
387
152
|
def initialize(transport:, system_prompt:,
|
|
388
153
|
step_limit: nil, cancellable: nil, interloper: nil,
|
|
389
|
-
|
|
154
|
+
id: '',
|
|
390
155
|
streaming: false,
|
|
391
156
|
&block)
|
|
392
157
|
@transport = transport.model ? transport : transport.with(model: RubyLLM.config.default_model)
|
|
@@ -403,8 +168,6 @@ module Pikuri
|
|
|
403
168
|
# Stashed for {#run_configure}, which runs the failure-prone
|
|
404
169
|
# build phase below out of a separate method.
|
|
405
170
|
@block = block
|
|
406
|
-
@context_window = context_window
|
|
407
|
-
@llama_probe_url = llama_probe_url
|
|
408
171
|
|
|
409
172
|
# Register *before* the build phase so a mid-construction raise
|
|
410
173
|
# is still recoverable: extensions arm their cleanup via
|
|
@@ -427,9 +190,6 @@ module Pikuri
|
|
|
427
190
|
end
|
|
428
191
|
end
|
|
429
192
|
|
|
430
|
-
# @return [RubyLLM::Chat] underlying chat; the extension seam
|
|
431
|
-
attr_reader :chat
|
|
432
|
-
|
|
433
193
|
# @return [ChatTransport] the resolved transport bundle this
|
|
434
194
|
# agent was constructed with — same model id / provider /
|
|
435
195
|
# assume-model-exists flag passed to every +RubyLLM.chat+
|
|
@@ -473,10 +233,6 @@ module Pikuri
|
|
|
473
233
|
# each persona owns its own system prompt verbatim.
|
|
474
234
|
attr_reader :system_prompt
|
|
475
235
|
|
|
476
|
-
# @return [ListenerList] the listener list attached to this
|
|
477
|
-
# agent's chat
|
|
478
|
-
attr_reader :listeners
|
|
479
|
-
|
|
480
236
|
# @return [Control::StepLimit, nil] the step-budget control
|
|
481
237
|
# this agent was constructed with, or +nil+ when none.
|
|
482
238
|
attr_reader :step_limit
|
|
@@ -521,13 +277,15 @@ module Pikuri
|
|
|
521
277
|
# extensions).
|
|
522
278
|
attr_reader :extensions
|
|
523
279
|
|
|
524
|
-
# @return [Integer, nil] context-window cap
|
|
525
|
-
# {
|
|
526
|
-
#
|
|
527
|
-
#
|
|
528
|
-
#
|
|
529
|
-
#
|
|
530
|
-
#
|
|
280
|
+
# @return [Integer, nil] resolved context-window cap — the
|
|
281
|
+
# {ChatTransport#context_window} if one was given, else what
|
|
282
|
+
# {ContextWindowDetector} probed. +nil+ when neither produced
|
|
283
|
+
# a value (a non-llama server with no explicit cap). Re-resolved
|
|
284
|
+
# on every model switch (see {#run_loop}'s +transport:+). Read by
|
|
285
|
+
# extensions that spawn their own ruby_llm calls (notably the
|
|
286
|
+
# +agent+ tool from +pikuri-subagents+, which hands a sub-agent
|
|
287
|
+
# +parent.transport.with(context_window: this)+ so the resolved
|
|
288
|
+
# cap rides along without a re-probe).
|
|
531
289
|
attr_reader :context_window_cap
|
|
532
290
|
|
|
533
291
|
# Final assistant message content for the most recent
|
|
@@ -552,11 +310,15 @@ module Pikuri
|
|
|
552
310
|
# and any other observable output is the listeners'
|
|
553
311
|
# responsibility.
|
|
554
312
|
#
|
|
555
|
-
# If the +step_limit+ control trips during
|
|
556
|
-
# branch
|
|
557
|
-
#
|
|
558
|
-
#
|
|
559
|
-
#
|
|
313
|
+
# If the +step_limit+ control trips during completion, the
|
|
314
|
+
# rescue branch applies its {Control::StepLimit#on_exhausted}
|
|
315
|
+
# policy: +:raise+ re-raises the +Exceeded+ exception to the
|
|
316
|
+
# host (chat history intact — the next turn's +reset!+
|
|
317
|
+
# refreshes the budget, so "continue" just works);
|
|
318
|
+
# +:synthesize+ emits an {Event::FallbackNotice} and runs the
|
|
319
|
+
# {Synthesizer} prompt on a nested tools-free agent, capturing
|
|
320
|
+
# its answer for {#last_assistant_content}. See
|
|
321
|
+
# "Step-exhaustion policy" in the class header.
|
|
560
322
|
#
|
|
561
323
|
# If the +cancellable+ control trips during +ask+, the rescue
|
|
562
324
|
# branch emits an {Event::Cancelled} and re-raises the
|
|
@@ -566,19 +328,45 @@ module Pikuri
|
|
|
566
328
|
# Subsequent calls keep building on the same chat history, so
|
|
567
329
|
# the model sees full multi-turn context.
|
|
568
330
|
#
|
|
331
|
+
# == Switching models mid-conversation
|
|
332
|
+
#
|
|
333
|
+
# Passing a +transport:+ that differs from the current one
|
|
334
|
+
# switches the underlying chat to that model — via
|
|
335
|
+
# +Chat#with_model+, so the history and the registered
|
|
336
|
+
# callbacks survive — re-resolves the context-window cap, and
|
|
337
|
+
# emits an {Event::ModelSwitched} followed by a fresh
|
|
338
|
+
# {Event::ContextCap}. The switch is deliberately confined to
|
|
339
|
+
# the top of this method (a private +apply_transport!+) rather
|
|
340
|
+
# than exposed as a standalone setter: the chat is
|
|
341
|
+
# single-thread-confined, so doing it here serializes the swap
|
|
342
|
+
# with the turn on the loop's own thread — a background thread
|
|
343
|
+
# mutating +with_model+'s connection mid-completion would tear
|
|
344
|
+
# an in-flight stream. A +nil+ +transport:+ (the default) keeps
|
|
345
|
+
# the current model. The conversation is *not* re-baselined: a
|
|
346
|
+
# switch is the same conversation under a new model, so the
|
|
347
|
+
# message count and running context size carry over (the next
|
|
348
|
+
# turn's token report self-corrects to the new model's count).
|
|
349
|
+
#
|
|
569
350
|
# @param user_message [String] the user's request for this
|
|
570
351
|
# turn; must not be +nil+, empty, or whitespace-only
|
|
352
|
+
# @param transport [ChatTransport, nil] when non-+nil+ and
|
|
353
|
+
# structurally different from the current transport, switch to
|
|
354
|
+
# it before running the turn (see above); +nil+ keeps the
|
|
355
|
+
# current model
|
|
571
356
|
# @raise [ArgumentError] if +user_message+ is +nil+, empty,
|
|
572
357
|
# or contains only whitespace — an empty turn would poison
|
|
573
358
|
# the chat history and burn a step budget on nothing
|
|
574
359
|
# @raise [Control::Cancellable::Cancelled] if the registered
|
|
575
360
|
# {Control::Cancellable} was triggered during the turn;
|
|
576
361
|
# the listener stream sees an {Event::Cancelled} first
|
|
362
|
+
# @raise [Control::StepLimit::Exceeded] if the step budget
|
|
363
|
+
# tripped and its policy is +:raise+ (the default)
|
|
577
364
|
# @return [nil]
|
|
578
|
-
def run_loop(user_message:)
|
|
365
|
+
def run_loop(user_message:, transport: nil)
|
|
579
366
|
raise ArgumentError, "user_message must not be blank, got #{user_message.inspect}" \
|
|
580
367
|
if user_message.nil? || user_message.to_s.strip.empty?
|
|
581
368
|
|
|
369
|
+
apply_transport!(transport) if transport
|
|
582
370
|
@synth_answer = nil
|
|
583
371
|
@step_limit&.reset!
|
|
584
372
|
@cancellable&.reset!
|
|
@@ -594,7 +382,7 @@ module Pikuri
|
|
|
594
382
|
@listeners.emit(Event::UserTurn.new(content: user_message, mid_loop: false))
|
|
595
383
|
dispatch_ext_on_user_message(user_message)
|
|
596
384
|
if @streaming
|
|
597
|
-
@chat.complete(&
|
|
385
|
+
@chat.complete(&streaming_block)
|
|
598
386
|
else
|
|
599
387
|
@chat.complete
|
|
600
388
|
end
|
|
@@ -603,42 +391,16 @@ module Pikuri
|
|
|
603
391
|
@listeners.emit(Event::Cancelled.new)
|
|
604
392
|
raise
|
|
605
393
|
rescue Control::StepLimit::Exceeded => e
|
|
606
|
-
@
|
|
607
|
-
reason: "agent exhausted #{e.max_steps} steps; synthesizing answer from gathered evidence"
|
|
608
|
-
))
|
|
394
|
+
raise unless @step_limit&.on_exhausted == :synthesize
|
|
609
395
|
|
|
610
|
-
|
|
611
|
-
# chat with a different system prompt, so it gets a
|
|
612
|
-
# distinct +_synthesizer+ suffix on the id — same +_+
|
|
613
|
-
# separator the sub-agent generator uses, so main becomes
|
|
614
|
-
# +"synthesizer"+ and a sub-agent +"researcher 0"+ becomes
|
|
615
|
-
# +"researcher 0_synthesizer"+. Any +TokenLog+ in the list
|
|
616
|
-
# tags the synth's prompt under that bracket so it's
|
|
617
|
-
# obvious from the log which turns were the rescue rather
|
|
618
|
-
# than the original loop.
|
|
619
|
-
synth_id = @id.empty? ? 'synthesizer' : "#{@id}_synthesizer"
|
|
620
|
-
synth_chat = RubyLLM.chat(**@transport.to_h)
|
|
621
|
-
# Defensive step limit on the synth: the synth has no
|
|
622
|
-
# tools so it should never trip +before_tool_call+, but
|
|
623
|
-
# guarding the budget anyway means a buggy provider that
|
|
624
|
-
# somehow returns a tool call doesn't loop forever.
|
|
625
|
-
synth_step_limit = @step_limit && Control::StepLimit.new(max: 1)
|
|
626
|
-
@synth_answer = Synthesizer.run(
|
|
627
|
-
chat: synth_chat,
|
|
628
|
-
parent_messages: @chat.messages,
|
|
629
|
-
user_message: user_message,
|
|
630
|
-
listeners: @listeners.for_sub_agent(id: synth_id),
|
|
631
|
-
step_limit: synth_step_limit,
|
|
632
|
-
cancellable: @cancellable,
|
|
633
|
-
streaming: @streaming
|
|
634
|
-
)
|
|
396
|
+
@synth_answer = Synthesizer.run_synthesizer(@extension_context, @chat.messages, user_message)
|
|
635
397
|
nil
|
|
636
398
|
end
|
|
637
399
|
|
|
638
400
|
# Release agent-owned resources. Fires every handler registered
|
|
639
401
|
# via {Configurator#on_close} (during the +Agent.new+ block) and
|
|
640
|
-
# {#on_close} (during {Extension#bind} or any
|
|
641
|
-
#
|
|
402
|
+
# {ExtensionContext#on_close} (during {Extension#bind} or any
|
|
403
|
+
# later hook), in LIFO order — matches Ruby +ensure+-block semantics
|
|
642
404
|
# so handlers registered later (which may depend on handlers
|
|
643
405
|
# registered earlier) tear down first. Each handler runs inside
|
|
644
406
|
# its own +rescue+; an exception is logged via
|
|
@@ -661,46 +423,6 @@ module Pikuri
|
|
|
661
423
|
end
|
|
662
424
|
end
|
|
663
425
|
|
|
664
|
-
# Register a handler called by {#close}. Symmetric to
|
|
665
|
-
# {Configurator#on_close} — same LIFO + per-handler-rescue +
|
|
666
|
-
# idempotent semantics — but available post-construction, so
|
|
667
|
-
# an {Extension}'s +bind(agent)+ can install per-agent cleanup
|
|
668
|
-
# that's keyed to this specific agent rather than the parent.
|
|
669
|
-
#
|
|
670
|
-
# @yield called with no arguments at close time
|
|
671
|
-
# @return [void]
|
|
672
|
-
def on_close(&blk)
|
|
673
|
-
raise ArgumentError, 'on_close requires a block' unless block_given?
|
|
674
|
-
|
|
675
|
-
@on_close_handlers << blk
|
|
676
|
-
nil
|
|
677
|
-
end
|
|
678
|
-
|
|
679
|
-
# Register a raw +RubyLLM::Tool+ subclass on this agent's
|
|
680
|
-
# underlying chat, bypassing the {Pikuri::Tool} strict-validation
|
|
681
|
-
# seam. Sole intended caller: {Mcp::Servers::Connect}, which uses
|
|
682
|
-
# this to lazy-add MCP-exposed tools after the LLM invokes
|
|
683
|
-
# +mcp_connect+ in a turn.
|
|
684
|
-
#
|
|
685
|
-
# The +internal_+ prefix is the warning: native pikuri tools
|
|
686
|
-
# should go through {Pikuri::Tool} so they get
|
|
687
|
-
# {Tool::Parameters} validation and the LLM-actionable
|
|
688
|
-
# +"Error: ..."+ contract. MCP tools deliberately don't — see
|
|
689
|
-
# IDEAS.md §"v1 implementation shape" / "MCP tools bypass
|
|
690
|
-
# +Pikuri::Tool+ entirely."
|
|
691
|
-
#
|
|
692
|
-
# The added tool does NOT enter +@tools+, only +@chat+'s tool
|
|
693
|
-
# list. Sub-agents (the +agent+ tool from +pikuri-subagents+)
|
|
694
|
-
# therefore cannot snapshot it — which is the whole point:
|
|
695
|
-
# activation is strictly per-agent, see IDEAS.md §"Per-agent
|
|
696
|
-
# activation, no propagation".
|
|
697
|
-
#
|
|
698
|
-
# @param ruby_llm_tool [Class] subclass of +RubyLLM::Tool+
|
|
699
|
-
# @return [void]
|
|
700
|
-
def internal_add_tool(ruby_llm_tool)
|
|
701
|
-
@chat.with_tool(ruby_llm_tool)
|
|
702
|
-
end
|
|
703
|
-
|
|
704
426
|
# Short, single-line config dump suitable for a startup
|
|
705
427
|
# banner or a debug print.
|
|
706
428
|
#
|
|
@@ -748,42 +470,164 @@ module Pikuri
|
|
|
748
470
|
end
|
|
749
471
|
@extensions = configurator.extensions.dup
|
|
750
472
|
|
|
751
|
-
@chat =
|
|
473
|
+
@chat = build_chat(@transport)
|
|
752
474
|
@chat.with_instructions(@system_prompt)
|
|
753
475
|
@tools.each { |t| @chat.with_tool(t.to_ruby_llm_tool) }
|
|
754
476
|
|
|
755
|
-
@
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
760
|
-
|
|
477
|
+
# Wire @chat for pikuri's event stream and controls — the
|
|
478
|
+
# three message-level registered callbacks (+after_message+,
|
|
479
|
+
# +before_tool_call+, +after_tool_result+). The per-chunk
|
|
480
|
+
# streaming callback is separate because ruby_llm takes it as
|
|
481
|
+
# a block to +Chat#complete+ rather than a registered hook —
|
|
482
|
+
# see {#streaming_block}. Together with the +@listeners.emit+
|
|
483
|
+
# calls in {#run_loop} / {#dispatch_ext_on_user_message} this
|
|
484
|
+
# is the complete "which callback emits which event" map.
|
|
485
|
+
@chat.after_message do |msg|
|
|
486
|
+
emit_after_message(msg)
|
|
487
|
+
end
|
|
488
|
+
@chat.before_tool_call do |tc|
|
|
489
|
+
@listeners.emit(Event::ToolCall.new(name: tc.name, arguments: tc.arguments))
|
|
490
|
+
@step_limit&.tick!
|
|
491
|
+
@cancellable&.check!
|
|
492
|
+
end
|
|
493
|
+
@chat.after_tool_result do |result|
|
|
494
|
+
@listeners.emit(Event::ToolResult.new(content: result))
|
|
495
|
+
drain_interloper if @interloper
|
|
496
|
+
end
|
|
761
497
|
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
498
|
+
# Context-window cap: lets every listener that cares (notably
|
|
499
|
+
# TokenLog) pick the value off the stream before any Tokens
|
|
500
|
+
# event arrives. Re-fires on each model switch (see
|
|
501
|
+
# {#apply_transport!}).
|
|
502
|
+
detect_and_emit_context_cap!
|
|
503
|
+
|
|
504
|
+
# The runtime capability facade — constructed once, after the
|
|
505
|
+
# chat and listener list are final, and handed to every
|
|
506
|
+
# extension's #bind / #on_user_message. The ONLY object that
|
|
507
|
+
# grants emission / raw-tool-registration / close-handler
|
|
508
|
+
# capabilities; the Agent itself exposes no public path to
|
|
509
|
+
# them. See {ExtensionContext}.
|
|
510
|
+
@extension_context = ExtensionContext.new(
|
|
511
|
+
agent: self, chat: @chat, listeners: @listeners,
|
|
512
|
+
on_close_sink: @on_close_handlers
|
|
769
513
|
)
|
|
770
514
|
|
|
771
|
-
#
|
|
772
|
-
#
|
|
773
|
-
#
|
|
515
|
+
# Bind sweep — each extension gets its chance to install
|
|
516
|
+
# per-agent state (dynamic tools via
|
|
517
|
+
# {ExtensionContext#add_raw_tool}, per-agent close hooks via
|
|
518
|
+
# {ExtensionContext#on_close}, domain-event wiring via
|
|
519
|
+
# {ExtensionContext#emit_event}, etc.) now that the chat is
|
|
520
|
+
# fully wired. See IDEAS.md §"Extension protocol design" for
|
|
521
|
+
# what #configure vs #bind are each for.
|
|
522
|
+
@extensions.each { |ext| ext.bind(@extension_context) }
|
|
523
|
+
end
|
|
524
|
+
|
|
525
|
+
# Resolve the context-window cap and announce it on the listener
|
|
526
|
+
# stream as an {Event::ContextCap}. The transport's explicit
|
|
527
|
+
# +context_window+ wins verbatim; otherwise {ContextWindowDetector}
|
|
528
|
+
# probes the server (yielding +nil+ for a non-llama one). Because the
|
|
529
|
+
# cap rides {ChatTransport}, a model switch resolves the *new*
|
|
530
|
+
# transport's cap — explicit caps don't bleed across models. Shared
|
|
531
|
+
# by {#run_configure} (construction) and {#apply_transport!} (each
|
|
532
|
+
# model switch) so "how the cap is resolved and emitted" lives in
|
|
533
|
+
# one place.
|
|
534
|
+
#
|
|
535
|
+
# @return [void]
|
|
536
|
+
def detect_and_emit_context_cap!
|
|
537
|
+
# Probe the server this transport actually targets — after a
|
|
538
|
+
# cross-server switch the chat's connection points at
|
|
539
|
+
# +@transport.api_base+, but the process-global config the
|
|
540
|
+
# detector defaults to still names the *old* server, so derive the
|
|
541
|
+
# base from the transport (falling back to the global base for a
|
|
542
|
+
# transport that rides it).
|
|
543
|
+
@context_window_cap = @transport.context_window ||
|
|
544
|
+
ContextWindowDetector.detect(
|
|
545
|
+
@transport,
|
|
546
|
+
openai_base: @transport.api_base || RubyLLM.config.openai_api_base
|
|
547
|
+
)
|
|
774
548
|
@listeners.emit(Event::ContextCap.new(cap: @context_window_cap))
|
|
549
|
+
end
|
|
775
550
|
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
551
|
+
# Switch the underlying chat to +transport+ when it differs from
|
|
552
|
+
# the current one. Called only from the top of {#run_loop} — see
|
|
553
|
+
# that method's "Switching models mid-conversation" section for why
|
|
554
|
+
# the swap is confined to the loop's own thread.
|
|
555
|
+
#
|
|
556
|
+
# Mirrors {#initialize}'s +nil+-model fill before the structural
|
|
557
|
+
# comparison, so a +transport+ that defers its model to the default
|
|
558
|
+
# doesn't read as "different" and switch spuriously.
|
|
559
|
+
# +Chat#with_model+ swaps only the model / provider / connection,
|
|
560
|
+
# leaving +@chat+'s message history and registered callbacks
|
|
561
|
+
# intact, so the conversation continues seamlessly under the new
|
|
562
|
+
# model. Emits {Event::ModelSwitched} (the narration: old → new
|
|
563
|
+
# transport, unformatted, for the chrome to present) then re-resolves
|
|
564
|
+
# and re-emits the cap via {#detect_and_emit_context_cap!}.
|
|
565
|
+
#
|
|
566
|
+
# == Cross-server switches
|
|
567
|
+
#
|
|
568
|
+
# +with_model+ alone re-resolves against the chat's *existing*
|
|
569
|
+
# connection, so it can only move between models on one server. When
|
|
570
|
+
# either the new or the old transport overrides the connection
|
|
571
|
+
# (+ChatTransport#connection_overrides?+ — a different +api_base+ /
|
|
572
|
+
# +api_key+), the swap first installs a fresh +RubyLLM::Context+ via
|
|
573
|
+
# +Chat#with_context+ (which re-points the connection and re-resolves
|
|
574
|
+
# the model in place, again preserving history + callbacks) before
|
|
575
|
+
# +with_model+ lands the new model id. The "old overrode" half of
|
|
576
|
+
# the guard handles switching *back* to a global-config model: the
|
|
577
|
+
# rebuilt context dups the process-global config, resetting the
|
|
578
|
+
# connection the previous override installed.
|
|
579
|
+
#
|
|
580
|
+
# @param transport [ChatTransport] the model to switch to
|
|
581
|
+
# @return [void]
|
|
582
|
+
def apply_transport!(transport)
|
|
583
|
+
filled = transport.model ? transport : transport.with(model: RubyLLM.config.default_model)
|
|
584
|
+
return if filled == @transport
|
|
585
|
+
|
|
586
|
+
old = @transport
|
|
587
|
+
@chat.with_context(build_context(filled)) if filled.connection_overrides? || old.connection_overrides?
|
|
588
|
+
@chat.with_model(filled.model, provider: filled.provider, assume_exists: filled.assume_model_exists)
|
|
589
|
+
@transport = filled
|
|
590
|
+
@listeners.emit(Event::ModelSwitched.new(from: old, to: filled))
|
|
591
|
+
detect_and_emit_context_cap!
|
|
592
|
+
end
|
|
593
|
+
|
|
594
|
+
# Build the chat for +transport+: through a dedicated
|
|
595
|
+
# +RubyLLM::Context+ when it overrides the connection, else through
|
|
596
|
+
# the process-global +RubyLLM.chat+ (which the construction-time
|
|
597
|
+
# path has always used).
|
|
598
|
+
#
|
|
599
|
+
# @param transport [ChatTransport]
|
|
600
|
+
# @return [RubyLLM::Chat]
|
|
601
|
+
def build_chat(transport)
|
|
602
|
+
if transport.connection_overrides?
|
|
603
|
+
build_context(transport).chat(**transport.chat_kwargs)
|
|
604
|
+
else
|
|
605
|
+
RubyLLM.chat(**transport.chat_kwargs)
|
|
606
|
+
end
|
|
607
|
+
end
|
|
608
|
+
|
|
609
|
+
# A +RubyLLM::Context+ carrying +transport+'s connection overrides
|
|
610
|
+
# mapped onto the provider's ruby_llm config slots
|
|
611
|
+
# (+#{provider}_api_base+ / +#{provider}_api_key+). +RubyLLM.context+
|
|
612
|
+
# dups the process-global config, so an absent override inherits the
|
|
613
|
+
# global value; a transport with no overrides yields a plain dup
|
|
614
|
+
# (used by {#apply_transport!} to reset a prior override). The
|
|
615
|
+
# +ChatTransport+ guarantees a non-+nil+ +provider+ whenever an
|
|
616
|
+
# override is set, so the slot name is always resolvable.
|
|
617
|
+
#
|
|
618
|
+
# @param transport [ChatTransport]
|
|
619
|
+
# @return [RubyLLM::Context]
|
|
620
|
+
def build_context(transport)
|
|
621
|
+
slug = transport.provider
|
|
622
|
+
RubyLLM.context do |c|
|
|
623
|
+
c.public_send("#{slug}_api_base=", transport.api_base) unless transport.api_base.nil?
|
|
624
|
+
c.public_send("#{slug}_api_key=", transport.api_key) unless transport.api_key.nil?
|
|
625
|
+
end
|
|
782
626
|
end
|
|
783
627
|
|
|
784
628
|
# Fire the per-turn {Extension#on_user_message} hook on every
|
|
785
629
|
# extension that defines it, appending any returned
|
|
786
|
-
#
|
|
630
|
+
# String text block to the chat as a +role: :system+
|
|
787
631
|
# message right after the user turn it annotates (callers append
|
|
788
632
|
# the +:user+ message first; this runs last). The system role is
|
|
789
633
|
# load-bearing — it tags the block as recalled reference (not new
|
|
@@ -797,9 +641,8 @@ module Pikuri
|
|
|
797
641
|
#
|
|
798
642
|
# Private and the single place the chat log grows by a memory
|
|
799
643
|
# block — keeps "what mutates the log, when" one grep in this
|
|
800
|
-
# file. Fired from {#run_loop} (initial turn) and
|
|
801
|
-
#
|
|
802
|
-
# {.drain_interloper} (mid-loop interlopers). Called on every
|
|
644
|
+
# file. Fired from {#run_loop} (initial turn) and from
|
|
645
|
+
# {#drain_interloper} (mid-loop interlopers). Called on every
|
|
803
646
|
# extension unconditionally — same as {Extension#configure} /
|
|
804
647
|
# {Extension#bind}: the hook is part of the protocol and the
|
|
805
648
|
# {Extension} module supplies a no-op default, so any extension
|
|
@@ -811,7 +654,7 @@ module Pikuri
|
|
|
811
654
|
# @return [void]
|
|
812
655
|
def dispatch_ext_on_user_message(content)
|
|
813
656
|
@extensions.each do |ext|
|
|
814
|
-
message = ext.on_user_message(
|
|
657
|
+
message = ext.on_user_message(@extension_context, content)
|
|
815
658
|
next unless message.is_a?(String) && !message.strip.empty?
|
|
816
659
|
|
|
817
660
|
block = message.strip
|
|
@@ -820,5 +663,117 @@ module Pikuri
|
|
|
820
663
|
end
|
|
821
664
|
nil
|
|
822
665
|
end
|
|
666
|
+
|
|
667
|
+
# Build the per-chunk streaming block passed to +Chat#complete+.
|
|
668
|
+
# Each invocation of the returned proc converts one
|
|
669
|
+
# +RubyLLM::Chunk+ into zero, one, or two delta events
|
|
670
|
+
# ({Event::ThinkingDelta} / {Event::AssistantDelta}) on
|
|
671
|
+
# +@listeners+. Tool-call chunks are intentionally ignored —
|
|
672
|
+
# partial JSON has no useful rendering; the assembled
|
|
673
|
+
# +tool_calls+ surface through {Event::ToolCall} once the
|
|
674
|
+
# message completes.
|
|
675
|
+
#
|
|
676
|
+
# == Cancellation polling
|
|
677
|
+
#
|
|
678
|
+
# When +@cancellable+ is non-nil, {Control::Cancellable#check!}
|
|
679
|
+
# fires *before* each chunk's emit. The +before_tool_call+
|
|
680
|
+
# wiring in {#run_configure} only fires when the model requests a
|
|
681
|
+
# tool, which leaves a no-tool turn (e.g. a plain greeting)
|
|
682
|
+
# with zero cancellation points — Ctrl+C trips the flag but
|
|
683
|
+
# nothing reads it. Polling on every streamed chunk closes
|
|
684
|
+
# that gap: an in-flight Cancellation+check! raises on the
|
|
685
|
+
# next chunk delivered after the flag flips, the exception
|
|
686
|
+
# propagates out through ruby_llm's streaming path
|
|
687
|
+
# (+Chat#complete+ doesn't rescue), and {#run_loop} catches it,
|
|
688
|
+
# emits {Event::Cancelled}, and re-raises. The pre-emit
|
|
689
|
+
# ordering is deliberate: a chunk that arrives after a cancel
|
|
690
|
+
# request shouldn't render — the user has said stop.
|
|
691
|
+
#
|
|
692
|
+
# @return [Proc] a +-> (chunk) { ... }+ proc suitable for
|
|
693
|
+
# passing to +Chat#complete+ with +&+
|
|
694
|
+
def streaming_block
|
|
695
|
+
->(chunk) {
|
|
696
|
+
@cancellable&.check!
|
|
697
|
+
emit_chunk(chunk)
|
|
698
|
+
}
|
|
699
|
+
end
|
|
700
|
+
|
|
701
|
+
# Normalize a +RubyLLM::Chat+ +after_message+ payload into
|
|
702
|
+
# zero, one, or two {Event} variants (+Thinking+ and/or
|
|
703
|
+
# +Assistant+) plus one {Event::Tokens} for the usage block.
|
|
704
|
+
# Empty thinking / empty content are filtered here so
|
|
705
|
+
# listeners never see vacuous events. Non-assistant roles
|
|
706
|
+
# (e.g. tool-role messages echoed back through
|
|
707
|
+
# +after_message+) are skipped entirely.
|
|
708
|
+
#
|
|
709
|
+
# +msg+ is a +RubyLLM::Message+. Beyond +role+, +content+,
|
|
710
|
+
# +thinking+, and the +*_tokens+ accessors used here, it also
|
|
711
|
+
# carries +msg.tool_calls+ on assistant turns that requested
|
|
712
|
+
# one and +msg.raw+ for the unparsed provider payload.
|
|
713
|
+
#
|
|
714
|
+
# @param msg [RubyLLM::Message]
|
|
715
|
+
# @return [void]
|
|
716
|
+
def emit_after_message(msg)
|
|
717
|
+
return unless msg.role == :assistant
|
|
718
|
+
|
|
719
|
+
text = msg.thinking&.text
|
|
720
|
+
@listeners.emit(Event::Thinking.new(content: text)) if text && !text.empty?
|
|
721
|
+
|
|
722
|
+
content = msg.content
|
|
723
|
+
@listeners.emit(Event::Assistant.new(content: content)) if content.is_a?(String) && !content.empty?
|
|
724
|
+
|
|
725
|
+
@listeners.emit(Event::Tokens.new(
|
|
726
|
+
input: msg.input_tokens,
|
|
727
|
+
output: msg.output_tokens,
|
|
728
|
+
cached: msg.cached_tokens,
|
|
729
|
+
cache_creation: msg.cache_creation_tokens,
|
|
730
|
+
thinking: msg.thinking_tokens,
|
|
731
|
+
model_id: msg.model_id
|
|
732
|
+
))
|
|
733
|
+
end
|
|
734
|
+
|
|
735
|
+
# Normalize a +RubyLLM::Chunk+ from a streaming completion
|
|
736
|
+
# into zero, one, or two delta events
|
|
737
|
+
# ({Event::ThinkingDelta} / {Event::AssistantDelta}). Empty
|
|
738
|
+
# +thinking.text+ and empty +content+ are filtered here so
|
|
739
|
+
# listeners never see vacuous fragments. Tool-call deltas are
|
|
740
|
+
# intentionally skipped — see {#streaming_block}.
|
|
741
|
+
#
|
|
742
|
+
# +chunk+ is a +RubyLLM::Chunk+ (subclass of +RubyLLM::Message+),
|
|
743
|
+
# so the same +.thinking+ / +.content+ accessors used in
|
|
744
|
+
# {#emit_after_message} apply.
|
|
745
|
+
#
|
|
746
|
+
# @param chunk [RubyLLM::Chunk]
|
|
747
|
+
# @return [void]
|
|
748
|
+
def emit_chunk(chunk)
|
|
749
|
+
thinking = chunk.thinking&.text
|
|
750
|
+
@listeners.emit(Event::ThinkingDelta.new(content: thinking)) if thinking && !thinking.empty?
|
|
751
|
+
|
|
752
|
+
content = chunk.content
|
|
753
|
+
@listeners.emit(Event::AssistantDelta.new(content: content)) if content.is_a?(String) && !content.empty?
|
|
754
|
+
end
|
|
755
|
+
|
|
756
|
+
# Drain the interloper queue: for each pending item, append a
|
|
757
|
+
# +role: :user+ message to the chat history so the next
|
|
758
|
+
# round-trip sees it, emit an {Event::UserTurn} with
|
|
759
|
+
# +mid_loop: true+ to the listener stream so renderers see the
|
|
760
|
+
# injection, then run the per-turn {Extension#on_user_message}
|
|
761
|
+
# dispatch (so mid-loop injections are prefetched + recorded
|
|
762
|
+
# exactly like initial turns).
|
|
763
|
+
#
|
|
764
|
+
# The dispatch runs *after* the +:user+ append so any
|
|
765
|
+
# +<memory-context>+ it injects lands as a +:system+ message
|
|
766
|
+
# right behind the user turn it annotates — the same
|
|
767
|
+
# append-at-the-tail ordering {#run_loop} produces for initial
|
|
768
|
+
# turns.
|
|
769
|
+
#
|
|
770
|
+
# @return [void]
|
|
771
|
+
def drain_interloper
|
|
772
|
+
@interloper.drain!.each do |content|
|
|
773
|
+
@chat.add_message(role: :user, content: content)
|
|
774
|
+
@listeners.emit(Event::UserTurn.new(content: content, mid_loop: true))
|
|
775
|
+
dispatch_ext_on_user_message(content)
|
|
776
|
+
end
|
|
777
|
+
end
|
|
823
778
|
end
|
|
824
779
|
end
|