pikuri-core 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +67 -0
  3. data/lib/pikuri/agent/chat_transport.rb +41 -0
  4. data/lib/pikuri/agent/configurator.rb +270 -0
  5. data/lib/pikuri/agent/context_window_detector.rb +111 -0
  6. data/lib/pikuri/agent/control/cancellable.rb +128 -0
  7. data/lib/pikuri/agent/control/interloper.rb +167 -0
  8. data/lib/pikuri/agent/control/step_limit.rb +93 -0
  9. data/lib/pikuri/agent/control.rb +45 -0
  10. data/lib/pikuri/agent/event.rb +190 -0
  11. data/lib/pikuri/agent/extension.rb +82 -0
  12. data/lib/pikuri/agent/listener/in_memory_event_list.rb +34 -0
  13. data/lib/pikuri/agent/listener/rate_limited.rb +172 -0
  14. data/lib/pikuri/agent/listener/terminal.rb +264 -0
  15. data/lib/pikuri/agent/listener/token_log.rb +216 -0
  16. data/lib/pikuri/agent/listener.rb +54 -0
  17. data/lib/pikuri/agent/listener_list.rb +102 -0
  18. data/lib/pikuri/agent/synthesizer.rb +145 -0
  19. data/lib/pikuri/agent.rb +731 -0
  20. data/lib/pikuri/subprocess.rb +166 -0
  21. data/lib/pikuri/tool/calculator.rb +82 -0
  22. data/lib/pikuri/tool/fetch.rb +171 -0
  23. data/lib/pikuri/tool/parameters.rb +314 -0
  24. data/lib/pikuri/tool/scraper/fetch_error.rb +16 -0
  25. data/lib/pikuri/tool/scraper/html.rb +285 -0
  26. data/lib/pikuri/tool/scraper/pdf.rb +54 -0
  27. data/lib/pikuri/tool/scraper/simple.rb +183 -0
  28. data/lib/pikuri/tool/search/brave.rb +184 -0
  29. data/lib/pikuri/tool/search/duckduckgo.rb +196 -0
  30. data/lib/pikuri/tool/search/engines.rb +163 -0
  31. data/lib/pikuri/tool/search/exa.rb +217 -0
  32. data/lib/pikuri/tool/search/rate_limiter.rb +92 -0
  33. data/lib/pikuri/tool/search/result.rb +29 -0
  34. data/lib/pikuri/tool/sub_agent.rb +150 -0
  35. data/lib/pikuri/tool/web_scrape.rb +121 -0
  36. data/lib/pikuri/tool/web_search.rb +38 -0
  37. data/lib/pikuri/tool.rb +118 -0
  38. data/lib/pikuri/url_cache.rb +112 -0
  39. data/lib/pikuri/version.rb +10 -0
  40. data/lib/pikuri-core.rb +177 -0
  41. data/prompts/pikuri-chat.txt +15 -0
  42. metadata +251 -0
@@ -0,0 +1,731 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'ruby_llm'
4
+
5
+ module Pikuri
6
+ # Thin wrapper around +RubyLLM::Chat+: pikuri owns the *extension
7
+ # surface* (the event-stream listeners that consume normalized
8
+ # chat events, plus the controls that signal back into the loop)
9
+ # while ruby_llm owns the loop itself. The Thought / Tool-call /
10
+ # Observation iteration lives in +Chat#complete+; pikuri's job
11
+ # is wiring ruby_llm's three callbacks at construction time,
12
+ # emitting {Event} variants from each, and forwarding control
13
+ # signals (step-budget tick, cancellation check, mid-loop input
14
+ # drain) to the appropriate {Control}.
15
+ #
16
+ # == Roles in this file
17
+ #
18
+ # Two seams are visible:
19
+ #
20
+ # 1. **Listeners** ({ListenerList} + {Listener::Base} subclasses)
21
+ # — pure consumers of the event stream. The +Agent+ is the
22
+ # sole emitter; listeners never write back. New rendering or
23
+ # capture targets (a web sink, a structured log) are added
24
+ # here without touching {Agent}.
25
+ # 2. **Controls** ({Control::StepLimit}, {Control::Cancellable},
26
+ # {Control::Interloper}) — host-facing signal holders. The
27
+ # +Agent+ reads from them at well-defined boundaries:
28
+ # {Control::StepLimit#tick!} on every +before_tool_call+ (and
29
+ # {Control::StepLimit#reset!} at turn start),
30
+ # {Control::Cancellable#check!} on every +before_tool_call+
31
+ # (and {Control::Cancellable#reset!} at turn start),
32
+ # {Control::Interloper#drain!} on every +after_tool_result+.
33
+ #
34
+ # The two roles are named separately so "what fires when" is a
35
+ # single grep for +@listeners.emit+ in this file.
36
+ #
37
+ # == Step-exhaustion rescue
38
+ #
39
+ # If the +step_limit:+ {Control::StepLimit} trips during
40
+ # +Chat#ask+, {#run_loop} catches the +Exceeded+ exception,
41
+ # emits an {Event::FallbackNotice} to the listener stream, and
42
+ # hands off to {Synthesizer.run} on a fresh +RubyLLM::Chat+.
43
+ # The synth reuses the parent's listener stream via
44
+ # {ListenerList#for_sub_agent} (Terminal padded, TokenLog
45
+ # zeroed, recorder shared by reference) with a +name:+ derived
46
+ # from the parent's. The synth shares the parent's
47
+ # +cancellable+ so a user cancel during synthesis still works,
48
+ # and gets a fresh +step_limit+ at +max: 1+ (defensive — the
49
+ # synth has no tools and shouldn't trip it). The synth's
50
+ # answer becomes the value reported by
51
+ # {#last_assistant_content}, so callers (notably
52
+ # {Tool::SubAgent}) still get a usable reply.
53
+ #
54
+ # == Cancellation rescue
55
+ #
56
+ # If the +cancellable:+ {Control::Cancellable} trips during
57
+ # +Chat#ask+, {#run_loop} catches the +Cancelled+ exception,
58
+ # emits an {Event::Cancelled} to the listener stream, and
59
+ # re-raises. No synthesizer fallback runs: cancellation means
60
+ # the user asked the agent to drop everything, so salvaging a
61
+ # partial answer would be the wrong move. The caller (typically
62
+ # a REPL) rescues the re-raised exception and returns control
63
+ # to the user; because {#run_loop} calls
64
+ # {Control::Cancellable#reset!} at the start of every turn, the
65
+ # same agent instance can take a fresh turn immediately
66
+ # afterwards.
67
+ class Agent
68
+ LOGGER = Pikuri.logger_for('Agent')
69
+ private_constant :LOGGER
70
+
71
+ # Wire one +RubyLLM::Chat+ for pikuri's event stream and
72
+ # controls. Used by both {#initialize} (on the main chat) and
73
+ # {Synthesizer.run} (on the synth chat) so the two share one
74
+ # source of truth for "which callback emits which event."
75
+ #
76
+ # Handles the three message-level registered callbacks
77
+ # (+after_message+, +before_tool_call+, +after_tool_result+);
78
+ # the per-chunk streaming callback is separate because
79
+ # ruby_llm takes it as a block to +Chat#ask+ rather than a
80
+ # registered hook — see {.streaming_block}.
81
+ #
82
+ # @param chat [RubyLLM::Chat] the chat instance to wire
83
+ # @param listeners [ListenerList] the listener stream events
84
+ # flow into
85
+ # @param step_limit [Control::StepLimit, nil] when set,
86
+ # {Control::StepLimit#tick!} is poked on every
87
+ # +before_tool_call+ (and raises {Control::StepLimit::Exceeded}
88
+ # when over budget)
89
+ # @param cancellable [Control::Cancellable, nil] when set,
90
+ # {Control::Cancellable#check!} is poked on every
91
+ # +before_tool_call+ (and raises
92
+ # {Control::Cancellable::Cancelled} when the flag is up)
93
+ # @param interloper [Control::Interloper, nil] when set, the
94
+ # queue is drained on every +after_tool_result+, each item
95
+ # appended as a +role: :user+ message and emitted as
96
+ # {Event::UserTurn} with +mid_loop: true+
97
+ # @return [void]
98
+ def self.wire_chat(chat, listeners:, step_limit: nil, cancellable: nil, interloper: nil)
99
+ chat.after_message do |msg|
100
+ emit_after_message(msg, listeners)
101
+ end
102
+ chat.before_tool_call do |tc|
103
+ listeners.emit(Event::ToolCall.new(name: tc.name, arguments: tc.arguments))
104
+ step_limit&.tick!
105
+ cancellable&.check!
106
+ end
107
+ chat.after_tool_result do |result|
108
+ listeners.emit(Event::ToolResult.new(content: result))
109
+ drain_interloper(interloper, chat, listeners) if interloper
110
+ end
111
+ end
112
+
113
+ # Build the per-chunk streaming block passed to +Chat#ask+.
114
+ # Each invocation of the returned proc converts one
115
+ # +RubyLLM::Chunk+ into zero, one, or two delta events
116
+ # ({Event::ThinkingDelta} / {Event::AssistantDelta}) on
117
+ # +listeners+. Tool-call chunks are intentionally ignored —
118
+ # partial JSON has no useful rendering; the assembled
119
+ # +tool_calls+ surface through {Event::ToolCall} once the
120
+ # message completes.
121
+ #
122
+ # Lives parallel to {.wire_chat} (instead of being folded into
123
+ # it) because +Chat#ask+ takes the streaming block as an
124
+ # argument rather than a registered callback, so both
125
+ # {#run_loop} and {Synthesizer.run} pass it inline at the call
126
+ # site with +&Agent.streaming_block(listeners: ..., cancellable: ...)+.
127
+ #
128
+ # == Cancellation polling
129
+ #
130
+ # When +cancellable+ is non-nil, {Control::Cancellable#check!}
131
+ # fires *before* each chunk's emit. The +before_tool_call+
132
+ # wiring in {.wire_chat} only fires when the model requests a
133
+ # tool, which leaves a no-tool turn (e.g. a plain greeting)
134
+ # with zero cancellation points — Ctrl+C trips the flag but
135
+ # nothing reads it. Polling on every streamed chunk closes
136
+ # that gap: an in-flight Cancellation+check! raises on the
137
+ # next chunk delivered after the flag flips, the exception
138
+ # propagates out through ruby_llm's streaming path
139
+ # (+Chat#ask+ doesn't rescue), and {#run_loop} catches it,
140
+ # emits {Event::Cancelled}, and re-raises. The pre-emit
141
+ # ordering is deliberate: a chunk that arrives after a cancel
142
+ # request shouldn't render — the user has said stop.
143
+ #
144
+ # @param listeners [ListenerList] the listener stream chunk
145
+ # events flow into
146
+ # @param cancellable [Control::Cancellable, nil] when non-nil,
147
+ # polled on every chunk so a flag flipped mid-stream raises
148
+ # {Control::Cancellable::Cancelled} on the very next chunk
149
+ # @return [Proc] a +-> (chunk) { ... }+ proc suitable for
150
+ # passing to +Chat#ask+ with +&+
151
+ def self.streaming_block(listeners:, cancellable: nil)
152
+ ->(chunk) {
153
+ cancellable&.check!
154
+ emit_chunk(chunk, listeners)
155
+ }
156
+ end
157
+
158
+ # Normalize a +RubyLLM::Chat+ +after_message+ payload into
159
+ # zero, one, or two {Event} variants (+Thinking+ and/or
160
+ # +Assistant+) plus one {Event::Tokens} for the usage block.
161
+ # Empty thinking / empty content are filtered here so
162
+ # listeners never see vacuous events. Non-assistant roles
163
+ # (e.g. tool-role messages echoed back through
164
+ # +after_message+) are skipped entirely.
165
+ #
166
+ # +msg+ is a +RubyLLM::Message+. Beyond +role+, +content+,
167
+ # +thinking+, and the +*_tokens+ accessors used here, it also
168
+ # carries +msg.tool_calls+ on assistant turns that requested
169
+ # one and +msg.raw+ for the unparsed provider payload.
170
+ #
171
+ # @param msg [RubyLLM::Message]
172
+ # @param listeners [ListenerList]
173
+ # @return [void]
174
+ def self.emit_after_message(msg, listeners)
175
+ return unless msg.role == :assistant
176
+
177
+ text = msg.thinking&.text
178
+ listeners.emit(Event::Thinking.new(content: text)) if text && !text.empty?
179
+
180
+ content = msg.content
181
+ listeners.emit(Event::Assistant.new(content: content)) if content.is_a?(String) && !content.empty?
182
+
183
+ listeners.emit(Event::Tokens.new(
184
+ input: msg.input_tokens,
185
+ output: msg.output_tokens,
186
+ cached: msg.cached_tokens,
187
+ cache_creation: msg.cache_creation_tokens,
188
+ thinking: msg.thinking_tokens,
189
+ model_id: msg.model_id
190
+ ))
191
+ end
192
+ private_class_method :emit_after_message
193
+
194
+ # Normalize a +RubyLLM::Chunk+ from a streaming +Chat#ask+
195
+ # into zero, one, or two delta events
196
+ # ({Event::ThinkingDelta} / {Event::AssistantDelta}). Empty
197
+ # +thinking.text+ and empty +content+ are filtered here so
198
+ # listeners never see vacuous fragments. Tool-call deltas are
199
+ # intentionally skipped — see {.streaming_block}.
200
+ #
201
+ # +chunk+ is a +RubyLLM::Chunk+ (subclass of +RubyLLM::Message+),
202
+ # so the same +.thinking+ / +.content+ accessors used in
203
+ # {.emit_after_message} apply.
204
+ #
205
+ # @param chunk [RubyLLM::Chunk]
206
+ # @param listeners [ListenerList]
207
+ # @return [void]
208
+ def self.emit_chunk(chunk, listeners)
209
+ thinking = chunk.thinking&.text
210
+ listeners.emit(Event::ThinkingDelta.new(content: thinking)) if thinking && !thinking.empty?
211
+
212
+ content = chunk.content
213
+ listeners.emit(Event::AssistantDelta.new(content: content)) if content.is_a?(String) && !content.empty?
214
+ end
215
+ private_class_method :emit_chunk
216
+
217
+ # Drain the interloper queue: for each pending item, append a
218
+ # +role: :user+ message to the chat history so the next
219
+ # round-trip sees it, then emit an {Event::UserTurn} with
220
+ # +mid_loop: true+ to the listener stream so renderers see
221
+ # the injection.
222
+ #
223
+ # @param interloper [Control::Interloper]
224
+ # @param chat [RubyLLM::Chat]
225
+ # @param listeners [ListenerList]
226
+ # @return [void]
227
+ def self.drain_interloper(interloper, chat, listeners)
228
+ interloper.drain!.each do |content|
229
+ chat.add_message(role: :user, content: content)
230
+ listeners.emit(Event::UserTurn.new(content: content, mid_loop: true))
231
+ end
232
+ end
233
+ private_class_method :drain_interloper
234
+
235
+ # One-shot inference. Builds a fresh +RubyLLM::Chat+ with no
236
+ # tools, no MCP, no listeners, no step budget, asks +prompt+ as
237
+ # the single user turn, and returns the assistant's reply as a
238
+ # plain String. Lives parallel to {#initialize} / {#run_loop}
239
+ # because the use case (e.g. summarizing an MCP server's tool
240
+ # set into a short description block before any agent turn
241
+ # runs) is genuinely one-shot — there is no loop, no tool
242
+ # iteration, no listener stream.
243
+ #
244
+ # +prompt+ is sent as the user message. For a one-shot call
245
+ # there is no behavioral difference between the system slot
246
+ # and the user slot, so we use one parameter; pack any
247
+ # "instructions + data" framing into +prompt+ directly.
248
+ #
249
+ # == Cancellation
250
+ #
251
+ # {Control::Cancellable#check!} fires once before the call and
252
+ # once after, so a flag flipped right around the request
253
+ # raises {Control::Cancellable::Cancelled} promptly. The
254
+ # in-flight HTTP call itself is *not* interrupted — same
255
+ # "gentle cancel" semantic the main loop offers (see
256
+ # {Control::Cancellable}'s class header). For 30s synthesis
257
+ # passes at boot this is still a useful escape hatch: the next
258
+ # check raises and the call returns.
259
+ #
260
+ # == Failure
261
+ #
262
+ # Errors from the provider (HTTP failure, malformed response,
263
+ # +RubyLLM+ raising) propagate to the caller verbatim — there
264
+ # is no recovery layer here. Callers that want "fail soft on
265
+ # synthesis errors" (e.g. {Mcp::Servers}) rescue at their level
266
+ # and fall back to a default; this method stays loud.
267
+ #
268
+ # @param transport [ChatTransport] same model-resolution
269
+ # triple {#initialize} uses; if +model+ is +nil+, falls
270
+ # back to +RubyLLM.config.default_model+
271
+ # @param prompt [String] the prompt sent as the single user
272
+ # turn; must be non-blank
273
+ # @param cancellable [Control::Cancellable, nil] when set,
274
+ # checked before the call so a flag flipped right
275
+ # around the request raises {Control::Cancellable::Cancelled}
276
+ # @return [String] the assistant's reply content
277
+ # @raise [ArgumentError] when +prompt+ is +nil+, empty, or
278
+ # whitespace-only
279
+ # @raise [Control::Cancellable::Cancelled] when the
280
+ # +cancellable+ flag was tripped at the pre-call check
281
+ def self.think(transport:, prompt:, cancellable: nil)
282
+ raise ArgumentError, "prompt must not be blank, got #{prompt.inspect}" \
283
+ if prompt.nil? || prompt.to_s.strip.empty?
284
+
285
+ transport = transport.with(model: RubyLLM.config.default_model) unless transport.model
286
+ cancellable&.check!
287
+ chat = RubyLLM.chat(**transport.to_h)
288
+ chat.ask(prompt)
289
+ last = chat.messages.reverse.find { |m| m.role == :assistant }
290
+ last&.content.to_s
291
+ end
292
+
293
+ # @param transport [ChatTransport] the model-resolution triple
294
+ # (+model+ / +provider+ / +assume_model_exists+) forwarded
295
+ # to +RubyLLM.chat+. Bundled into one value object so every
296
+ # construction site — this constructor and the synthesizer
297
+ # rescue below — can forward all three with one assignment
298
+ # instead of three kwargs (where dropping one would silently
299
+ # route the chat elsewhere or raise
300
+ # +RubyLLM::ModelNotFoundError+). If +transport.model+ is
301
+ # +nil+, it's filled in from +RubyLLM.config.default_model+.
302
+ # @param system_prompt [String] system message prepended to
303
+ # the chat. Extensions append their advertisement blocks
304
+ # (e.g. +<available_skills>+, +<available_mcps>+) onto this
305
+ # base via {Configurator#append_system_prompt} during the
306
+ # block.
307
+ # @param step_limit [Control::StepLimit, nil] step budget
308
+ # control. When set, {Control::StepLimit#tick!} fires on
309
+ # every +before_tool_call+ and {Control::StepLimit#reset!}
310
+ # at the start of each turn. +nil+ means "no step budget"
311
+ # (the agent can loop indefinitely).
312
+ # @param cancellable [Control::Cancellable, nil] cancellation
313
+ # control. When set, {Control::Cancellable#check!} fires on
314
+ # every +before_tool_call+ and
315
+ # {Control::Cancellable#reset!} at the start of each turn.
316
+ # +nil+ means "not cancellable" (the host has no way to
317
+ # stop a running turn except by killing the process).
318
+ # @param interloper [Control::Interloper, nil] mid-loop
319
+ # user-input queue. When set, the queue is drained at
320
+ # every +after_tool_result+ and each item becomes a
321
+ # {Event::UserTurn} with +mid_loop: true+. +nil+ means
322
+ # "no mid-loop injection" (the bundled CLIs default).
323
+ # @param context_window [Integer, nil] explicit override for
324
+ # the model's context-window cap. When set, it wins over
325
+ # ruby_llm's reported value and the llama.cpp probe — see
326
+ # {ContextWindowDetector} for precedence. Resolved cap is
327
+ # emitted as an {Event::ContextCap} immediately after
328
+ # construction.
329
+ # @param llama_probe_url [String, nil] llama.cpp +/props+ URL
330
+ # used as the third detection source. Only consulted when
331
+ # neither +context_window+ nor ruby_llm's reported value is
332
+ # set. Typically derived by +bin/pikuri-chat+ from its
333
+ # configured +openai_api_base+; leave +nil+ when the
334
+ # configured server is anything other than llama.cpp.
335
+ # @param name [String] identifier for this agent. Empty for
336
+ # the main agent; sub-agents get monotonic hierarchical
337
+ # names like +"sub_agent 0"+, +"sub_agent 1"+,
338
+ # +"sub_agent 0_0"+, ... generated by {Tool::SubAgent} from
339
+ # the parent's name + a per-parent counter. Forwarded to
340
+ # listeners through {ListenerList#for_sub_agent} so name-
341
+ # aware ones (notably {Listener::TokenLog}) can tag their
342
+ # output.
343
+ # @param streaming [Boolean] opt into chunk-level streaming.
344
+ # When +true+, {#run_loop} passes the block returned by
345
+ # {.streaming_block} to +Chat#ask+, and ruby_llm requests
346
+ # SSE responses from the provider — chunks are normalized
347
+ # into {Event::ThinkingDelta} / {Event::AssistantDelta} on
348
+ # the listener stream as they arrive. When +false+ (the
349
+ # default), +Chat#ask+ runs in single-shot mode and only
350
+ # the message-level {Event::Thinking} / {Event::Assistant}
351
+ # bookends fire from +after_message+. Read by
352
+ # {Tool::SubAgent} so spawned sub-agents inherit the same
353
+ # mode without an extra kwarg.
354
+ # @yield [Configurator] yields a {Configurator} that collects
355
+ # tools (via {Configurator#add_tool} / {Configurator#add_tools}),
356
+ # listeners (via {Configurator#add_listener} /
357
+ # {Configurator#add_listeners}), system-prompt snippets (via
358
+ # {Configurator#append_system_prompt}), extension instances
359
+ # (via {Configurator#add_extension} — which fires +configure+
360
+ # immediately), close handlers (via {Configurator#on_close}),
361
+ # and an optional +sub_agent+ tool (via
362
+ # {Configurator#allow_sub_agent}). The Configurator is the
363
+ # *only* path for adding any of these — there are no parallel
364
+ # ctor kwargs. The block is optional; an agent constructed
365
+ # without one has no tools, no listeners, no extensions.
366
+ # @return [Agent]
367
+ def initialize(transport:, system_prompt:,
368
+ step_limit: nil, cancellable: nil, interloper: nil,
369
+ context_window: nil, llama_probe_url: nil, name: '',
370
+ streaming: false,
371
+ &block)
372
+ @transport = transport.model ? transport : transport.with(model: RubyLLM.config.default_model)
373
+ @cancellable = cancellable
374
+ @closed = false
375
+
376
+ @system_prompt = system_prompt
377
+ @step_limit = step_limit
378
+ @interloper = interloper
379
+ @name = name
380
+ @streaming = streaming
381
+ @synth_answer = nil
382
+ @on_close_handlers = []
383
+
384
+ # Single Configurator funnel for everything the block adds —
385
+ # tools, listeners, system-prompt snippets, extensions
386
+ # (both newly-configured via #add_extension and inherited
387
+ # via #inherit_extensions for sub-agents), on_close handlers,
388
+ # and the sub-agent request. See IDEAS.md §"Extension protocol
389
+ # design".
390
+ configurator = Configurator.new(
391
+ transport: @transport,
392
+ system_prompt_base: system_prompt,
393
+ name: @name,
394
+ streaming: @streaming,
395
+ step_limit: @step_limit,
396
+ cancellable: @cancellable,
397
+ interloper: @interloper
398
+ )
399
+
400
+ block&.call(configurator)
401
+
402
+ @tools = configurator.tools.dup
403
+ @listeners = ListenerList.new(configurator.listeners)
404
+ configurator.system_prompt_additions.each do |snippet|
405
+ @system_prompt = "#{@system_prompt}\n\n#{snippet}"
406
+ end
407
+ @on_close_handlers.concat(configurator.on_close_handlers)
408
+ @extensions = configurator.extensions.dup
409
+
410
+ @chat = RubyLLM.chat(**@transport.to_h)
411
+ @chat.with_instructions(@system_prompt)
412
+ @tools.each { |t| @chat.with_tool(t.to_ruby_llm_tool) }
413
+
414
+ @context_window_cap = ContextWindowDetector.new(
415
+ override: context_window,
416
+ ruby_llm_reported: @chat.model.context_window,
417
+ llama_probe_url: llama_probe_url
418
+ ).detect
419
+
420
+ self.class.wire_chat(
421
+ @chat,
422
+ listeners: @listeners,
423
+ step_limit: @step_limit,
424
+ cancellable: @cancellable,
425
+ interloper: @interloper
426
+ )
427
+
428
+ # One-shot context-window cap: lets every listener that
429
+ # cares (notably TokenLog) pick the value off the stream
430
+ # before any Tokens event arrives.
431
+ @listeners.emit(Event::ContextCap.new(cap: @context_window_cap))
432
+
433
+ # Sub-agent tool: constructed *after* @tools is final and
434
+ # @context_window_cap is set, so its snapshot of the parent's
435
+ # tool list doesn't include itself (recursion guard) and the
436
+ # cap can be threaded through to spawned sub-agents. The new
437
+ # +Tool::SubAgent+ instance is appended to both +@tools+ and
438
+ # +@chat+, so sub-agents inheriting via the snapshot still
439
+ # get the surrounding tool set but never the +sub_agent+ tool
440
+ # itself. See {Configurator#allow_sub_agent}.
441
+ if configurator.sub_agent_request
442
+ if @tools.any?(Tool::SubAgent)
443
+ raise 'Tool::SubAgent must not be added via c.add_tool when c.allow_sub_agent ' \
444
+ 'is used; Agent auto-registers it from the Configurator request.'
445
+ end
446
+
447
+ sub_tool = Tool::SubAgent.new(self, max_steps: configurator.sub_agent_request.max_steps)
448
+ @tools << sub_tool
449
+ @chat.with_tool(sub_tool.to_ruby_llm_tool)
450
+ end
451
+
452
+ # Bind sweep — each extension gets its chance to install
453
+ # per-agent state (dynamic tools via #internal_add_tool,
454
+ # per-agent close hooks via #on_close, etc.) now that the
455
+ # chat is fully wired. See IDEAS.md §"Extension protocol
456
+ # design" for what #configure vs #bind are each for.
457
+ @extensions.each { |ext| ext.bind(self) }
458
+
459
+ # Fallback cleanup: if the host forgets to call #close, the
460
+ # at_exit hook fires it on process exit. Idempotent, so an
461
+ # explicit close earlier makes this a no-op. The closure
462
+ # captures self, which keeps the agent reachable until
463
+ # process exit — fine for the handful of agents a typical
464
+ # host creates; if pikuri grows a long-running host that
465
+ # constructs many short-lived agents, switch to a single
466
+ # process-global registry that close-then-removes.
467
+ at_exit { close }
468
+ end
469
+
470
+ # @return [RubyLLM::Chat] underlying chat; the extension seam
471
+ attr_reader :chat
472
+
473
+ # @return [ChatTransport] the resolved transport bundle this
474
+ # agent was constructed with — same model id / provider /
475
+ # assume-model-exists flag passed to every +RubyLLM.chat+
476
+ # call originating from this agent (the main chat, the
477
+ # synthesizer rescue, the sub-agent tool). Read by
478
+ # {Tool::SubAgent} so spawned sub-agents reuse the same
479
+ # transport.
480
+ attr_reader :transport
481
+
482
+ # @return [Array<Tool>] this agent's tool list in declaration
483
+ # order. Snapshotted by {Tool::SubAgent} so spawned
484
+ # sub-agents inherit the parent's tools (minus the
485
+ # sub-agent tool itself, which {#allow_sub_agent} appends
486
+ # to +@tools+ only after the snapshot has been taken —
487
+ # recursion guard).
488
+ attr_reader :tools
489
+
490
+ # @return [String] resolved model id from {#transport}.
491
+ # Convenience delegator for callers that don't need the
492
+ # full transport bundle.
493
+ def model
494
+ @transport.model
495
+ end
496
+
497
+ # @return [String] system prompt actually sent to the chat —
498
+ # equal to the constructor's +system_prompt:+ argument plus
499
+ # any snippets appended by extensions during
500
+ # {Configurator#append_system_prompt} (Skills'
501
+ # +<available_skills>+, MCP's +<available_mcps>+, ...).
502
+ # {Tool::SubAgent} forwards this already-augmented value to
503
+ # spawned sub-agents so they see the same advertisements
504
+ # without re-running extension configure.
505
+ attr_reader :system_prompt
506
+
507
+ # @return [ListenerList] the listener list attached to this
508
+ # agent's chat
509
+ attr_reader :listeners
510
+
511
+ # @return [Control::StepLimit, nil] the step-budget control
512
+ # this agent was constructed with, or +nil+ when none.
513
+ # Read by {Tool::SubAgent} so spawned sub-agents derive
514
+ # their own.
515
+ attr_reader :step_limit
516
+
517
+ # @return [Control::Cancellable, nil] the cancellation
518
+ # control this agent was constructed with, or +nil+ when
519
+ # none. Read by {Tool::SubAgent} so spawned sub-agents
520
+ # share the same instance.
521
+ attr_reader :cancellable
522
+
523
+ # @return [Control::Interloper, nil] the mid-loop user-input
524
+ # control this agent was constructed with, or +nil+ when
525
+ # none. Not propagated to sub-agents — see
526
+ # {Control::Interloper#for_sub_agent}.
527
+ attr_reader :interloper
528
+
529
+ # @return [String] this agent's identifier — empty for the
530
+ # main agent; for sub-agents, the hierarchical id assigned
531
+ # by {Tool::SubAgent} (e.g. +"sub_agent 0"+,
532
+ # +"sub_agent 1"+, +"sub_agent 0_0"+). Read by the
533
+ # sub-agent tool so spawned sub-agents prefix their own
534
+ # names with this one, and propagated to listeners via
535
+ # {ListenerList#for_sub_agent} so name-aware ones can tag
536
+ # output.
537
+ attr_reader :name
538
+
539
+ # @return [Boolean] +true+ when this agent opted into
540
+ # chunk-level streaming (see the +streaming:+ kwarg on
541
+ # {#initialize}); +false+ otherwise. Read by
542
+ # {Tool::SubAgent} so spawned sub-agents inherit the same
543
+ # mode.
544
+ attr_reader :streaming
545
+
546
+ # @return [Array<Extension>] extension instances bound to this
547
+ # agent — added via {Configurator#add_extension} (new — runs
548
+ # +configure+ now and binds later) or {Configurator#inherit_extensions}
549
+ # (sub-agent inheritance — skips +configure+, just binds), both
550
+ # inside the +Agent.new+ block. Read by {Tool::SubAgent} so
551
+ # spawned sub-agents inherit the parent's extension list and
552
+ # re-bind them via the bind sweep.
553
+ attr_reader :extensions
554
+
555
+ # @return [Integer, nil] context-window cap resolved by
556
+ # {ContextWindowDetector} at construction time. +nil+ when
557
+ # no source produced a value (custom local model with no
558
+ # override and no reachable llama.cpp +/props+). Read by
559
+ # {Tool::SubAgent} so spawned sub-agents inherit the same
560
+ # cap without re-probing.
561
+ attr_reader :context_window_cap
562
+
563
+ # Final assistant message content for the most recent
564
+ # {#run_loop}. When the synthesizer rescue fired, returns its
565
+ # answer; otherwise walks the underlying chat's history.
566
+ # Returns +nil+ if neither source has produced an assistant
567
+ # turn yet.
568
+ #
569
+ # @return [String, nil]
570
+ def last_assistant_content
571
+ return @synth_answer if @synth_answer
572
+
573
+ last = @chat.messages.reverse.find { |m| m.role == :assistant }
574
+ last&.content
575
+ end
576
+
577
+ # Run the agent loop for a single user turn. Emits an
578
+ # {Event::UserTurn} with +mid_loop: false+, resets the
579
+ # step-budget and cancellation controls (so a stale state
580
+ # from a prior turn doesn't poison this one), and forwards
581
+ # +user_message+ to {#chat} via +ask+. Returns nil; rendering
582
+ # and any other observable output is the listeners'
583
+ # responsibility.
584
+ #
585
+ # If the +step_limit+ control trips during +ask+, the rescue
586
+ # branch emits an {Event::FallbackNotice} and runs
587
+ # {Synthesizer.run} on a fresh +RubyLLM::Chat+. The synth's
588
+ # answer is captured for {#last_assistant_content}; the
589
+ # exception does not bubble out.
590
+ #
591
+ # If the +cancellable+ control trips during +ask+, the rescue
592
+ # branch emits an {Event::Cancelled} and re-raises the
593
+ # +Cancelled+ exception. No synthesizer fallback runs — see
594
+ # the "Cancellation rescue" section in the class header.
595
+ #
596
+ # Subsequent calls keep building on the same chat history, so
597
+ # the model sees full multi-turn context.
598
+ #
599
+ # @param user_message [String] the user's request for this
600
+ # turn; must not be +nil+, empty, or whitespace-only
601
+ # @raise [ArgumentError] if +user_message+ is +nil+, empty,
602
+ # or contains only whitespace — an empty turn would poison
603
+ # the chat history and burn a step budget on nothing
604
+ # @raise [Control::Cancellable::Cancelled] if the registered
605
+ # {Control::Cancellable} was triggered during the turn;
606
+ # the listener stream sees an {Event::Cancelled} first
607
+ # @return [nil]
608
+ def run_loop(user_message:)
609
+ raise ArgumentError, "user_message must not be blank, got #{user_message.inspect}" \
610
+ if user_message.nil? || user_message.to_s.strip.empty?
611
+
612
+ @synth_answer = nil
613
+ @listeners.emit(Event::UserTurn.new(content: user_message, mid_loop: false))
614
+ @step_limit&.reset!
615
+ @cancellable&.reset!
616
+ if @streaming
617
+ @chat.ask(user_message, &self.class.streaming_block(listeners: @listeners, cancellable: @cancellable))
618
+ else
619
+ @chat.ask(user_message)
620
+ end
621
+ nil
622
+ rescue Control::Cancellable::Cancelled
623
+ @listeners.emit(Event::Cancelled.new)
624
+ raise
625
+ rescue Control::StepLimit::Exceeded => e
626
+ @listeners.emit(Event::FallbackNotice.new(
627
+ reason: "agent exhausted #{e.max_steps} steps; synthesizing answer from gathered evidence"
628
+ ))
629
+
630
+ # Synth runs under this agent's identity but on a fresh
631
+ # chat with a different system prompt, so it gets a
632
+ # distinct +_synthesizer+ suffix on the name — same +_+
633
+ # separator the sub-agent generator uses, so main becomes
634
+ # +"synthesizer"+ and a sub-agent +"sub_agent 0"+ becomes
635
+ # +"sub_agent 0_synthesizer"+. Any +TokenLog+ in the list
636
+ # tags the synth's prompt under that bracket so it's
637
+ # obvious from the log which turns were the rescue rather
638
+ # than the original loop.
639
+ synth_name = @name.empty? ? 'synthesizer' : "#{@name}_synthesizer"
640
+ synth_chat = RubyLLM.chat(**@transport.to_h)
641
+ # Defensive step limit on the synth: the synth has no
642
+ # tools so it should never trip +before_tool_call+, but
643
+ # guarding the budget anyway means a buggy provider that
644
+ # somehow returns a tool call doesn't loop forever.
645
+ synth_step_limit = @step_limit && Control::StepLimit.new(max: 1)
646
+ @synth_answer = Synthesizer.run(
647
+ chat: synth_chat,
648
+ parent_messages: @chat.messages,
649
+ user_message: user_message,
650
+ listeners: @listeners.for_sub_agent(name: synth_name),
651
+ step_limit: synth_step_limit,
652
+ cancellable: @cancellable,
653
+ streaming: @streaming
654
+ )
655
+ nil
656
+ end
657
+
658
+ # Release agent-owned resources. Fires every handler registered
659
+ # via {Configurator#on_close} (during the +Agent.new+ block) and
660
+ # {#on_close} (during {Extension#bind} or any post-construction
661
+ # call), in LIFO order — matches Ruby +ensure+-block semantics
662
+ # so handlers registered later (which may depend on handlers
663
+ # registered earlier) tear down first. Each handler runs inside
664
+ # its own +rescue+; an exception is logged via
665
+ # +Pikuri.logger_for+ but doesn't abort the rest. Idempotent —
666
+ # subsequent calls are no-ops.
667
+ #
668
+ # @return [void]
669
+ def close
670
+ return if @closed
671
+
672
+ @closed = true
673
+ @on_close_handlers.reverse_each do |handler|
674
+ handler.call
675
+ rescue StandardError => e
676
+ LOGGER.warn("on_close handler raised #{e.class}: #{e.message}")
677
+ end
678
+ end
679
+
680
+ # Register a handler called by {#close}. Symmetric to
681
+ # {Configurator#on_close} — same LIFO + per-handler-rescue +
682
+ # idempotent semantics — but available post-construction, so
683
+ # an {Extension}'s +bind(agent)+ can install per-agent cleanup
684
+ # that's keyed to this specific agent rather than the parent.
685
+ #
686
+ # @yield called with no arguments at close time
687
+ # @return [void]
688
+ def on_close(&blk)
689
+ raise ArgumentError, 'on_close requires a block' unless block_given?
690
+
691
+ @on_close_handlers << blk
692
+ nil
693
+ end
694
+
695
+ # Register a raw +RubyLLM::Tool+ subclass on this agent's
696
+ # underlying chat, bypassing the {Pikuri::Tool} strict-validation
697
+ # seam. Sole intended caller: {Mcp::Servers::Connect}, which uses
698
+ # this to lazy-add MCP-exposed tools after the LLM invokes
699
+ # +mcp_connect+ in a turn.
700
+ #
701
+ # The +internal_+ prefix is the warning: native pikuri tools
702
+ # should go through {Pikuri::Tool} so they get
703
+ # {Tool::Parameters} validation and the LLM-actionable
704
+ # +"Error: ..."+ contract. MCP tools deliberately don't — see
705
+ # IDEAS.md §"v1 implementation shape" / "MCP tools bypass
706
+ # +Pikuri::Tool+ entirely."
707
+ #
708
+ # The added tool does NOT enter +@tools+, only +@chat+'s tool
709
+ # list. {Tool::SubAgent} therefore cannot snapshot it (which is
710
+ # the whole point — activation is strictly per-agent, see
711
+ # IDEAS.md §"Per-agent activation, no propagation").
712
+ #
713
+ # @param ruby_llm_tool [Class] subclass of +RubyLLM::Tool+
714
+ # @return [void]
715
+ def internal_add_tool(ruby_llm_tool)
716
+ @chat.with_tool(ruby_llm_tool)
717
+ end
718
+
719
+ # Short, single-line config dump suitable for a startup
720
+ # banner or a debug print.
721
+ #
722
+ # @example
723
+ # agent.to_s
724
+ # # => "Agent(model=qwen3-35b, tools=4, listeners=[Terminal])"
725
+ #
726
+ # @return [String]
727
+ def to_s
728
+ "Agent(model=#{model}, tools=#{@tools.size}, listeners=#{@listeners})"
729
+ end
730
+ end
731
+ end