pikuri-core 0.0.5 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +5 -3
- data/lib/pikuri/agent/chat_transport.rb +135 -11
- data/lib/pikuri/agent/configurator.rb +4 -4
- data/lib/pikuri/agent/context_window_detector.rb +103 -52
- data/lib/pikuri/agent/control/step_limit.rb +39 -7
- data/lib/pikuri/agent/event.rb +43 -16
- data/lib/pikuri/agent/extension.rb +31 -17
- data/lib/pikuri/agent/extension_context.rb +147 -0
- data/lib/pikuri/agent/listener/terminal.rb +30 -37
- data/lib/pikuri/agent/listener/token_log.rb +60 -13
- data/lib/pikuri/agent/listener.rb +12 -5
- data/lib/pikuri/agent/listener_list.rb +7 -17
- data/lib/pikuri/agent/synthesizer.rb +93 -67
- data/lib/pikuri/agent.rb +358 -403
- data/lib/pikuri/extractor/html.rb +303 -0
- data/lib/pikuri/extractor/passthrough.rb +64 -0
- data/lib/pikuri/extractor.rb +314 -0
- data/lib/pikuri/file_type.rb +74 -266
- data/lib/pikuri/sanitizer.rb +179 -0
- data/lib/pikuri/subprocess.rb +73 -2
- data/lib/pikuri/tool/calculator.rb +213 -41
- data/lib/pikuri/tool/fetch.rb +10 -9
- data/lib/pikuri/tool/parameters.rb +65 -2
- data/lib/pikuri/tool/scraper.rb +186 -0
- data/lib/pikuri/tool/search/brave.rb +32 -18
- data/lib/pikuri/tool/search/duckduckgo.rb +18 -7
- data/lib/pikuri/tool/search/engines.rb +72 -49
- data/lib/pikuri/tool/search/exa.rb +34 -22
- data/lib/pikuri/tool/web_scrape.rb +5 -5
- data/lib/pikuri/tool/web_search.rb +45 -26
- data/lib/pikuri/version.rb +1 -1
- data/lib/pikuri-core.rb +11 -10
- metadata +9 -66
- data/lib/pikuri/tool/scraper/fetch_error.rb +0 -16
- data/lib/pikuri/tool/scraper/html.rb +0 -285
- data/lib/pikuri/tool/scraper/pdf.rb +0 -54
- data/lib/pikuri/tool/scraper/simple.rb +0 -183
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 3b034cfd9a32f43694e474444155ea4e18c0c728449f50f33755a789b4879ab6
|
|
4
|
+
data.tar.gz: 0fdff7d50e07f52f9063b240db87e67d8bb93ac85a09cbb56b3caace3232478c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 25ac79d6951ce4574d6400742bec3603306c72ca334786a13c34c3a3a5ab48de2735be5b5f51b1b6a25aa647c8f3d36da2f53d3bc97072ea276528464d6d4e16
|
|
7
|
+
data.tar.gz: 60100a4ab334e08066b29057d0eea4003a6c0db67947cf17a81484308dcd305ef08869b740965d6e81be246fd3ca964ad5afb2ca668bc101eaa867ed63c1bc02
|
data/README.md
CHANGED
|
@@ -12,8 +12,10 @@ AI-assistant toolkit:
|
|
|
12
12
|
token accounting, and structured capture.
|
|
13
13
|
- Controls (`StepLimit`, `Cancellable`, `Interloper`) for budget
|
|
14
14
|
enforcement + cancellation.
|
|
15
|
-
-
|
|
16
|
-
`
|
|
15
|
+
- Three stateless bundled tools (`CALCULATOR`, `WEB_SCRAPE`,
|
|
16
|
+
`FETCH`) plus a host-configured web-search tool built via
|
|
17
|
+
`Pikuri::Tool::WebSearch.build` (provider keys passed in, never
|
|
18
|
+
read from the environment).
|
|
17
19
|
- A demo binary, `bin/pikuri-chat`.
|
|
18
20
|
|
|
19
21
|
Extensions (skills, MCP, workspace, coding stack, named-agent
|
|
@@ -57,7 +59,7 @@ agent = Pikuri::Agent.new(
|
|
|
57
59
|
step_limit: Pikuri::Agent::Control::StepLimit.new(max: 20)
|
|
58
60
|
) do |c|
|
|
59
61
|
c.add_tool Pikuri::Tool::CALCULATOR
|
|
60
|
-
c.add_tool Pikuri::Tool::
|
|
62
|
+
c.add_tool Pikuri::Tool::WebSearch.build
|
|
61
63
|
c.add_listener Pikuri::Agent::Listener::Terminal.new
|
|
62
64
|
end
|
|
63
65
|
agent.run_loop(user_message: 'What is 17 * 23?')
|
|
@@ -2,18 +2,32 @@
|
|
|
2
2
|
|
|
3
3
|
module Pikuri
|
|
4
4
|
class Agent
|
|
5
|
-
#
|
|
6
|
-
#
|
|
7
|
-
#
|
|
5
|
+
# Everything that has to travel together for a chat to resolve to
|
|
6
|
+
# the same model *on the same server* on every construction: the
|
|
7
|
+
# model id, the provider hint, the registry-bypass flag, and — when
|
|
8
|
+
# the model lives on a server other than the process-global
|
|
9
|
+
# +RubyLLM.config+ default — that server's base URL and API key.
|
|
8
10
|
#
|
|
9
11
|
# Bundling them is structural protection against a recurring bug
|
|
10
12
|
# class — every forwarding site (the synthesizer rescue in
|
|
11
13
|
# {Agent#run_loop}, the +agent+ tool from +pikuri-subagents+
|
|
12
|
-
# spawning a sub-agent
|
|
13
|
-
#
|
|
14
|
-
#
|
|
15
|
-
#
|
|
16
|
-
# field.
|
|
14
|
+
# spawning a sub-agent, a mid-conversation model switch) used to
|
|
15
|
+
# pass the resolution fields individually, and dropping one routed
|
|
16
|
+
# the chat to a different server or raised
|
|
17
|
+
# +RubyLLM::ModelNotFoundError+ on the unknown model id. With a
|
|
18
|
+
# single value object the call site can't silently miss a field.
|
|
19
|
+
#
|
|
20
|
+
# == Why +api_base+ / +api_key+ live here
|
|
21
|
+
#
|
|
22
|
+
# +RubyLLM::Chat#with_model+ swaps only the model/provider against
|
|
23
|
+
# the chat's *existing* connection config, so switching to a model
|
|
24
|
+
# on a different server (a small local llama.cpp vs a big cloud
|
|
25
|
+
# model) needs the connection to travel with the model — otherwise
|
|
26
|
+
# the new model id is sent to the old server's URL with the old
|
|
27
|
+
# key. {Agent} maps these two generic fields onto the provider's
|
|
28
|
+
# ruby_llm config slots (+#{provider}_api_base+ /
|
|
29
|
+
# +#{provider}_api_key+) via a per-chat +RubyLLM::Context+; both are
|
|
30
|
+
# +nil+ for a transport that rides the process-global config.
|
|
17
31
|
#
|
|
18
32
|
# Pure data carrier: no +RubyLLM+ references here, so the seam stays
|
|
19
33
|
# in {Agent}, +bin/pikuri-chat+, and {Tool}.
|
|
@@ -25,18 +39,128 @@ module Pikuri
|
|
|
25
39
|
# @return [Symbol, nil] forwarded to +RubyLLM.chat+. Required
|
|
26
40
|
# together with +assume_model_exists+ when pointing at a local
|
|
27
41
|
# OpenAI-compatible server (llama.cpp, gpustack, ...) whose model
|
|
28
|
-
# ids are not in ruby_llm's bundled registry
|
|
42
|
+
# ids are not in ruby_llm's bundled registry; required whenever
|
|
43
|
+
# +api_base+ / +api_key+ is set (it names the config slots).
|
|
29
44
|
# @!attribute [r] assume_model_exists
|
|
30
45
|
# @return [Boolean] forwarded to +RubyLLM.chat+; +true+ skips
|
|
31
46
|
# ruby_llm's registry lookup and trusts the supplied model id.
|
|
32
47
|
# Requires +provider+.
|
|
33
|
-
|
|
48
|
+
# @!attribute [r] api_base
|
|
49
|
+
# @return [String, nil] connection base URL for this model's
|
|
50
|
+
# server (e.g. +http://localhost:8080/v1+). +nil+ rides the
|
|
51
|
+
# process-global +RubyLLM.config+ base. Mapped to the provider's
|
|
52
|
+
# +#{provider}_api_base+ slot by {Agent}.
|
|
53
|
+
# @!attribute [r] api_key
|
|
54
|
+
# @return [String, nil] API key for this model's server. +nil+
|
|
55
|
+
# rides the process-global config key. Mapped to the provider's
|
|
56
|
+
# +#{provider}_api_key+ slot by {Agent}. Redacted in {#inspect}
|
|
57
|
+
# so it never leaks into a log line or backtrace.
|
|
58
|
+
# @!attribute [r] context_window
|
|
59
|
+
# @return [Integer, nil] explicit context-window cap for this
|
|
60
|
+
# model on this server, or +nil+ to defer to
|
|
61
|
+
# {ContextWindowDetector}'s probe. Travels with the model
|
|
62
|
+
# because the cap *is* a per-model-per-server property: a
|
|
63
|
+
# {Ctrl+P}-style switch to a different transport must carry its
|
|
64
|
+
# own cap, not inherit the previous model's. Never sent to
|
|
65
|
+
# ruby_llm (it is neither a {#chat_kwargs} entry nor a
|
|
66
|
+
# connection slot) — pure pikuri metadata read by
|
|
67
|
+
# {Agent#detect_and_emit_context_cap!}. The cap-inheritance
|
|
68
|
+
# channel too: the +agent+ tool from +pikuri-subagents+ and the
|
|
69
|
+
# synthesizer hand a spawned agent +parent.transport.with(
|
|
70
|
+
# context_window: parent.context_window_cap)+ so the parent's
|
|
71
|
+
# *resolved* cap (explicit or probed) rides along without a
|
|
72
|
+
# re-probe.
|
|
73
|
+
class ChatTransport < Data.define(:model, :provider, :assume_model_exists, :api_base, :api_key, :context_window)
|
|
74
|
+
# Build an +:openai+-provider transport for an OpenAI-compatible
|
|
75
|
+
# server (a local llama.cpp, a cloud endpoint, ...), carrying that
|
|
76
|
+
# server's connection so the agent rides a per-chat
|
|
77
|
+
# +RubyLLM::Context+ instead of the process-global +RubyLLM.config+.
|
|
78
|
+
# This is the host-boot factory the +bin/pikuri-*+ demos use in
|
|
79
|
+
# place of +RubyLLM.configure+ — one isolated connection per agent,
|
|
80
|
+
# so several agents pointed at different servers (and different
|
|
81
|
+
# keys) don't stomp a shared global.
|
|
82
|
+
#
|
|
83
|
+
# +server+ is the bare server origin; a trailing +/v1+ (the
|
|
84
|
+
# OpenAI-compatible suffix ruby_llm appends to reach
|
|
85
|
+
# +/v1/chat/completions+) is stripped and re-appended exactly once,
|
|
86
|
+
# so +https://api.x.ai+, +https://api.x.ai/v1+, and
|
|
87
|
+
# +https://api.x.ai/v1/+ all normalize to the same +.../v1+ base.
|
|
88
|
+
# Without this, a +server+ value that already ended in +/v1+ would
|
|
89
|
+
# double to +/v1/v1+ and every request would 404.
|
|
90
|
+
#
|
|
91
|
+
# @param server [String] server origin, with or without a trailing
|
|
92
|
+
# +/v1+, e.g. +"http://localhost:8080"+ or +"https://api.x.ai/v1"+
|
|
93
|
+
# @param model [String] model id served there, trusted verbatim
|
|
94
|
+
# (+assume_model_exists+ is +true+, so it need not appear in
|
|
95
|
+
# ruby_llm's bundled registry)
|
|
96
|
+
# @param api_key [String] API key for the server; the conventional
|
|
97
|
+
# +"not-needed"+ placeholder for a keyless local server
|
|
98
|
+
# @param context_window [Integer, nil] explicit context-window cap
|
|
99
|
+
# for this model, or +nil+ to defer to {ContextWindowDetector}'s
|
|
100
|
+
# +/props+ probe (the right default for a local llama.cpp, which
|
|
101
|
+
# reports its launched +n_ctx+; the right *override* for a cloud
|
|
102
|
+
# server the probe can't reach, e.g. a 2M-window model on x.ai)
|
|
103
|
+
# @return [ChatTransport] a transport whose +api_base+ is the
|
|
104
|
+
# normalized +.../v1+ URL and whose +api_key+ is +api_key+
|
|
105
|
+
def self.from_openai_server(server:, model:, api_key: 'not-needed', context_window: nil)
|
|
106
|
+
base = server.to_s.strip.chomp('/').delete_suffix('/v1')
|
|
107
|
+
new(
|
|
108
|
+
model: model,
|
|
109
|
+
provider: :openai,
|
|
110
|
+
assume_model_exists: true,
|
|
111
|
+
api_base: "#{base}/v1",
|
|
112
|
+
api_key: api_key,
|
|
113
|
+
context_window: context_window
|
|
114
|
+
)
|
|
115
|
+
end
|
|
116
|
+
|
|
34
117
|
# @param model [String, nil]
|
|
35
118
|
# @param provider [Symbol, nil]
|
|
36
119
|
# @param assume_model_exists [Boolean]
|
|
37
|
-
|
|
120
|
+
# @param api_base [String, nil]
|
|
121
|
+
# @param api_key [String, nil]
|
|
122
|
+
# @param context_window [Integer, nil]
|
|
123
|
+
# @raise [ArgumentError] if +api_base+ or +api_key+ is set without
|
|
124
|
+
# a +provider+ (the provider names the config slots the
|
|
125
|
+
# connection overrides map onto)
|
|
126
|
+
def initialize(model:, provider: nil, assume_model_exists: false,
|
|
127
|
+
api_base: nil, api_key: nil, context_window: nil)
|
|
128
|
+
if (api_base || api_key) && provider.nil?
|
|
129
|
+
raise ArgumentError, "api_base/api_key require a provider, got #{provider.inspect}"
|
|
130
|
+
end
|
|
131
|
+
|
|
38
132
|
super
|
|
39
133
|
end
|
|
134
|
+
|
|
135
|
+
# The model-resolution kwargs to spread into +RubyLLM.chat+ /
|
|
136
|
+
# +RubyLLM::Context#chat+. Excludes the connection fields — those
|
|
137
|
+
# configure the +Context+ the chat is built from, not the +chat+
|
|
138
|
+
# call itself.
|
|
139
|
+
#
|
|
140
|
+
# @return [Hash{Symbol => String, Symbol, Boolean, nil}]
|
|
141
|
+
def chat_kwargs
|
|
142
|
+
{ model: model, provider: provider, assume_model_exists: assume_model_exists }
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
# Whether this transport overrides the process-global connection
|
|
146
|
+
# (and so needs a dedicated +RubyLLM::Context+).
|
|
147
|
+
#
|
|
148
|
+
# @return [Boolean]
|
|
149
|
+
def connection_overrides?
|
|
150
|
+
!api_base.nil? || !api_key.nil?
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
# Default +Data#inspect+ would print +api_key+ verbatim, leaking
|
|
154
|
+
# the secret into any log line, +to_s+ interpolation, or backtrace
|
|
155
|
+
# that touches the transport. Redact it.
|
|
156
|
+
#
|
|
157
|
+
# @return [String]
|
|
158
|
+
def inspect
|
|
159
|
+
"#<#{self.class} model=#{model.inspect} provider=#{provider.inspect} " \
|
|
160
|
+
"assume_model_exists=#{assume_model_exists} api_base=#{api_base.inspect} " \
|
|
161
|
+
"api_key=#{api_key.nil? ? 'nil' : '[REDACTED]'} context_window=#{context_window.inspect}>"
|
|
162
|
+
end
|
|
163
|
+
alias to_s inspect
|
|
40
164
|
end
|
|
41
165
|
end
|
|
42
166
|
end
|
|
@@ -16,7 +16,7 @@ module Pikuri
|
|
|
16
16
|
#
|
|
17
17
|
# Pikuri::Agent.new(transport: ..., system_prompt: ...) do |c|
|
|
18
18
|
# c.add_listener Pikuri::Agent::Listener::Terminal.new
|
|
19
|
-
# c.add_tool Pikuri::Tool::
|
|
19
|
+
# c.add_tool Pikuri::Tool::WebSearch.build
|
|
20
20
|
# c.add_tool Pikuri::Tool::WEB_SCRAPE
|
|
21
21
|
# c.add_tool Pikuri::Tool::FETCH
|
|
22
22
|
# c.add_extension Pikuri::Skill::Extension.new(catalog: catalog)
|
|
@@ -116,8 +116,8 @@ module Pikuri
|
|
|
116
116
|
|
|
117
117
|
# @return [Array<#configure>] extension instances added via
|
|
118
118
|
# {#add_extension}, in declaration order. The Agent ctor
|
|
119
|
-
# walks this list and calls +bind
|
|
120
|
-
#
|
|
119
|
+
# walks this list after wiring is complete and calls +bind+
|
|
120
|
+
# on each with the agent's {ExtensionContext}.
|
|
121
121
|
attr_reader :extensions
|
|
122
122
|
|
|
123
123
|
# @param transport [Agent::ChatTransport]
|
|
@@ -219,7 +219,7 @@ module Pikuri
|
|
|
219
219
|
|
|
220
220
|
# Register an extension. The extension's +configure(self)+ is
|
|
221
221
|
# called immediately so source-order matches execution-order.
|
|
222
|
-
# The instance is also retained for the +bind(
|
|
222
|
+
# The instance is also retained for the +bind(ctx)+ sweep
|
|
223
223
|
# that runs at the end of {Agent#initialize}.
|
|
224
224
|
#
|
|
225
225
|
# Extensions must implement both +configure+ and +bind+. The
|
|
@@ -1,36 +1,55 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
+
require 'ruby_llm'
|
|
3
4
|
require 'faraday'
|
|
4
5
|
require 'json'
|
|
5
6
|
require 'cgi'
|
|
6
7
|
|
|
7
8
|
module Pikuri
|
|
8
9
|
class Agent
|
|
9
|
-
# Resolves the model's context-window cap
|
|
10
|
-
#
|
|
11
|
-
# llama.cpp +/props+
|
|
12
|
-
#
|
|
10
|
+
# Resolves the model's context-window cap by asking the server that
|
|
11
|
+
# actually serves it. The only authoritative runtime source pikuri
|
|
12
|
+
# has is llama.cpp's non-standard +/props+ endpoint, which reports
|
|
13
|
+
# the server's *launched* +n_ctx+ (the real window — possibly
|
|
14
|
+
# smaller than the model's theoretical max, e.g. +llama-server -c
|
|
15
|
+
# 8192+ on a 128k model). Returns +nil+ — an honest "we don't
|
|
16
|
+
# know" — for anything else.
|
|
13
17
|
#
|
|
14
|
-
# Used by {Agent#
|
|
15
|
-
# {Listener::TokenLog} a cap it can
|
|
16
|
-
# context size (so the +ctx=12.2k/32.0k+
|
|
17
|
-
# close the conversation is to the limit).
|
|
18
|
+
# Used by {Agent#detect_and_emit_context_cap!} at construction and
|
|
19
|
+
# after every model switch to feed {Listener::TokenLog} a cap it can
|
|
20
|
+
# render alongside the running context size (so the +ctx=12.2k/32.0k+
|
|
21
|
+
# line tells the operator how close the conversation is to the limit).
|
|
22
|
+
# The caller prefers an explicit/inherited
|
|
23
|
+
# {ChatTransport#context_window} over this probe; this runs only when
|
|
24
|
+
# the transport carries none.
|
|
18
25
|
#
|
|
19
|
-
# ==
|
|
26
|
+
# == Why no ruby_llm registry source
|
|
20
27
|
#
|
|
21
|
-
#
|
|
22
|
-
#
|
|
23
|
-
#
|
|
24
|
-
#
|
|
25
|
-
#
|
|
26
|
-
#
|
|
27
|
-
#
|
|
28
|
-
#
|
|
29
|
-
#
|
|
30
|
-
#
|
|
31
|
-
#
|
|
32
|
-
#
|
|
33
|
-
#
|
|
28
|
+
# +RubyLLM::Model::Info#context_window+ is a static lookup in a
|
|
29
|
+
# bundled +models.json+ snapshot: +nil+ for every +assume_exists+
|
|
30
|
+
# local model id, +nil+ for anything newer than the snapshot, and —
|
|
31
|
+
# worst — a *frozen* value for known models, so a window the provider
|
|
32
|
+
# later bumped (256k → 1M) still reports the old number. A cap you
|
|
33
|
+
# have to caveat defeats the cap's only job (a number trustworthy
|
|
34
|
+
# enough to act on before +RubyLLM::ContextLengthExceededError+), so
|
|
35
|
+
# pikuri deliberately does not consult it. The probe (server truth)
|
|
36
|
+
# and an explicit {ChatTransport#context_window} (operator/parent
|
|
37
|
+
# truth) are the only two sources; absent both, the cap is +nil+.
|
|
38
|
+
#
|
|
39
|
+
# == The openai-provider gate + auto-derived URL
|
|
40
|
+
#
|
|
41
|
+
# The probe only makes sense against an OpenAI-compatible local
|
|
42
|
+
# server (llama.cpp), reached through ruby_llm's +:openai+ provider
|
|
43
|
+
# with a custom base. So {.detect} runs only when
|
|
44
|
+
# +transport.provider == :openai+ and derives the probe URL from the
|
|
45
|
+
# *same* +RubyLLM.config.openai_api_base+ the chat itself uses —
|
|
46
|
+
# +/props+ lives at the host root, NOT under +/v1+, so the +/v1+
|
|
47
|
+
# suffix is stripped. Deriving from the live config (rather than a
|
|
48
|
+
# URL passed in) means the probe can't target a different server than
|
|
49
|
+
# the chat. A bare +:openai+ pointed at real +api.openai.com+ gets
|
|
50
|
+
# one fast +/props+ 404 that degrades to +nil+ (the simple gate; not
|
|
51
|
+
# worth narrowing — you're already sending that server the whole
|
|
52
|
+
# conversation).
|
|
34
53
|
#
|
|
35
54
|
# == llama.cpp router mode
|
|
36
55
|
#
|
|
@@ -62,48 +81,80 @@ module Pikuri
|
|
|
62
81
|
LOGGER = Pikuri.logger_for('ContextWindowDetector')
|
|
63
82
|
|
|
64
83
|
# Connect timeout in seconds for the llama.cpp +/props+ probe.
|
|
65
|
-
# Short on purpose:
|
|
66
|
-
#
|
|
84
|
+
# Short on purpose: a server that isn't even listening should fail
|
|
85
|
+
# fast rather than stall +Agent+ construction.
|
|
67
86
|
#
|
|
68
87
|
# @return [Integer]
|
|
69
88
|
OPEN_TIMEOUT = 2
|
|
70
|
-
# Read timeout in seconds for the llama.cpp +/props+ probe
|
|
71
|
-
#
|
|
89
|
+
# Read timeout in seconds for the llama.cpp +/props+ probe.
|
|
90
|
+
# Generous on purpose, and the reason it differs from
|
|
91
|
+
# {OPEN_TIMEOUT}: a llama.cpp router answers +/props?model=<id>+
|
|
92
|
+
# only *after* spinning up that model's instance, and a cold model
|
|
93
|
+
# load can take 10+ seconds — which the next chat turn must wait
|
|
94
|
+
# for anyway. A read timeout shorter than the load would abandon
|
|
95
|
+
# the probe (and lose the cap) precisely when switching to a
|
|
96
|
+
# cold model. A server that accepts the connection but then hangs
|
|
97
|
+
# would stall the actual chat identically, so tolerating the wait
|
|
98
|
+
# here costs nothing extra.
|
|
72
99
|
#
|
|
73
100
|
# @return [Integer]
|
|
74
|
-
READ_TIMEOUT =
|
|
75
|
-
|
|
76
|
-
#
|
|
77
|
-
#
|
|
78
|
-
#
|
|
79
|
-
#
|
|
80
|
-
#
|
|
81
|
-
#
|
|
82
|
-
# @param
|
|
83
|
-
#
|
|
84
|
-
#
|
|
85
|
-
#
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
101
|
+
READ_TIMEOUT = 30
|
|
102
|
+
|
|
103
|
+
# Resolve the context-window cap for +transport+ by probing the
|
|
104
|
+
# server that serves it.
|
|
105
|
+
#
|
|
106
|
+
# @param transport [Agent::ChatTransport] the model-resolution
|
|
107
|
+
# triple; +provider+ gates the probe and +model+ drives the
|
|
108
|
+
# router +?model=+ hop
|
|
109
|
+
# @param openai_base [String, nil] the configured OpenAI-compatible
|
|
110
|
+
# base URL the probe URL is derived from; defaults to the live
|
|
111
|
+
# +RubyLLM.config.openai_api_base+. Passed explicitly only by
|
|
112
|
+
# tests, which don't want to mutate global config.
|
|
113
|
+
# @return [Integer, nil] the launched +n_ctx+, or +nil+ for a
|
|
114
|
+
# non-+:openai+ transport, an unconfigured base, or any probe
|
|
115
|
+
# failure
|
|
116
|
+
def self.detect(transport, openai_base: RubyLLM.config.openai_api_base)
|
|
117
|
+
return nil unless transport.provider == :openai
|
|
118
|
+
|
|
119
|
+
url = props_url(openai_base)
|
|
120
|
+
return nil if url.nil?
|
|
121
|
+
|
|
122
|
+
new(probe_url: url, model_id: transport.model).probe
|
|
91
123
|
end
|
|
92
124
|
|
|
93
|
-
#
|
|
94
|
-
#
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
125
|
+
# Derive the llama.cpp +/props+ URL from the OpenAI-compatible
|
|
126
|
+
# base. +/props+ sits at the host root, so a trailing +/v1+ is
|
|
127
|
+
# stripped before appending.
|
|
128
|
+
#
|
|
129
|
+
# @param openai_base [String, nil]
|
|
130
|
+
# @return [String, nil] the +/props+ URL, or +nil+ when the base
|
|
131
|
+
# is blank
|
|
132
|
+
def self.props_url(openai_base)
|
|
133
|
+
base = openai_base.to_s.strip.chomp('/')
|
|
134
|
+
return nil if base.empty?
|
|
135
|
+
|
|
136
|
+
"#{base.delete_suffix('/v1')}/props"
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# @param probe_url [String] full URL to llama.cpp +/props+
|
|
140
|
+
# @param model_id [String, nil] the chat model id, used to follow a
|
|
141
|
+
# llama.cpp router via +/props?model=<id>+ when the bare probe
|
|
142
|
+
# reports +role: router+. +nil+ or empty disables that second hop.
|
|
143
|
+
def initialize(probe_url:, model_id:)
|
|
144
|
+
@probe_url = probe_url
|
|
145
|
+
@model_id = model_id
|
|
146
|
+
end
|
|
99
147
|
|
|
148
|
+
# @return [Integer, nil] resolved cap, or +nil+ if the probe
|
|
149
|
+
# produced none
|
|
150
|
+
def probe
|
|
100
151
|
probe_llama_cpp
|
|
101
152
|
end
|
|
102
153
|
|
|
103
154
|
private
|
|
104
155
|
|
|
105
156
|
def probe_llama_cpp
|
|
106
|
-
data = fetch_props(@
|
|
157
|
+
data = fetch_props(@probe_url)
|
|
107
158
|
return nil if data.nil?
|
|
108
159
|
|
|
109
160
|
n_ctx = positive_n_ctx(data)
|
|
@@ -114,12 +165,12 @@ module Pikuri
|
|
|
114
165
|
return probe_router_model if data['role'] == 'router' && model_id_present?
|
|
115
166
|
|
|
116
167
|
warn_and_nil(
|
|
117
|
-
"no positive integer at default_generation_settings.n_ctx in #{@
|
|
168
|
+
"no positive integer at default_generation_settings.n_ctx in #{@probe_url} response"
|
|
118
169
|
)
|
|
119
170
|
end
|
|
120
171
|
|
|
121
172
|
def probe_router_model
|
|
122
|
-
url = "#{@
|
|
173
|
+
url = "#{@probe_url}?model=#{CGI.escape(@model_id)}"
|
|
123
174
|
data = fetch_props(url)
|
|
124
175
|
return nil if data.nil?
|
|
125
176
|
|
|
@@ -8,10 +8,27 @@ module Pikuri
|
|
|
8
8
|
# +Agent+ pokes {#tick!} on every +before_tool_call+
|
|
9
9
|
# callback and {#reset!} at the start of each turn. Once the
|
|
10
10
|
# counter exceeds the configured cap, {#tick!} raises
|
|
11
|
-
# {Exceeded}
|
|
12
|
-
#
|
|
13
|
-
# answer.
|
|
11
|
+
# {Exceeded} and the +Agent+ applies the {#on_exhausted}
|
|
12
|
+
# policy: re-raise to the host (the default), or run the
|
|
13
|
+
# step-exhaustion synthesizer to salvage a partial answer.
|
|
14
|
+
#
|
|
15
|
+
# == Why the policy lives here, not on +Agent+
|
|
16
|
+
#
|
|
17
|
+
# Synthesis can only ever fire off a tripped step limit, so
|
|
18
|
+
# an +Agent.new(synthesize: ...)+ kwarg would be meaningless
|
|
19
|
+
# whenever +step_limit:+ is +nil+ — an invalid combination
|
|
20
|
+
# the API would have to document away. Attaching the policy
|
|
21
|
+
# to the budget makes "what happens when the budget runs
|
|
22
|
+
# out" travel with the budget, and the nonsense state is
|
|
23
|
+
# unrepresentable. The host picks per wiring: a Q&A REPL
|
|
24
|
+
# wants +:synthesize+ (salvage an answer from the evidence
|
|
25
|
+
# gathered so far); a coding agent wants the default
|
|
26
|
+
# +:raise+ (a tools-free pass can't finish writing code —
|
|
27
|
+
# stop, let the user say "continue"; {#reset!} at the next
|
|
28
|
+
# turn boundary refreshes the budget).
|
|
14
29
|
class StepLimit
|
|
30
|
+
# Valid {#on_exhausted} policies.
|
|
31
|
+
ON_EXHAUSTED = %i[raise synthesize].freeze
|
|
15
32
|
# Raised by {#tick!} once tool-call count exceeds +max+.
|
|
16
33
|
# Carries the budget that was tripped so rescue clauses
|
|
17
34
|
# can include it in user-facing messages.
|
|
@@ -29,13 +46,25 @@ module Pikuri
|
|
|
29
46
|
# @return [Integer] the configured cap
|
|
30
47
|
attr_reader :max
|
|
31
48
|
|
|
49
|
+
# @return [Symbol] what {Agent#run_loop} does when this
|
|
50
|
+
# budget trips: +:raise+ lets {Exceeded} propagate to the
|
|
51
|
+
# host; +:synthesize+ runs the tools-free synthesizer
|
|
52
|
+
# rescue. See the class header for how to pick.
|
|
53
|
+
attr_reader :on_exhausted
|
|
54
|
+
|
|
32
55
|
# @param max [Integer] hard cap on tool-call rounds; must
|
|
33
56
|
# be positive
|
|
34
|
-
# @
|
|
35
|
-
|
|
57
|
+
# @param on_exhausted [Symbol] +:raise+ (default) or
|
|
58
|
+
# +:synthesize+ — see {#on_exhausted}
|
|
59
|
+
# @raise [ArgumentError] if +max+ is zero or negative, or
|
|
60
|
+
# +on_exhausted+ is not one of {ON_EXHAUSTED}
|
|
61
|
+
def initialize(max:, on_exhausted: :raise)
|
|
36
62
|
raise ArgumentError, "max must be positive, got #{max}" if max <= 0
|
|
63
|
+
raise ArgumentError, "on_exhausted must be one of #{ON_EXHAUSTED.inspect}, got #{on_exhausted.inspect}" \
|
|
64
|
+
unless ON_EXHAUSTED.include?(on_exhausted)
|
|
37
65
|
|
|
38
66
|
@max = max
|
|
67
|
+
@on_exhausted = on_exhausted
|
|
39
68
|
@step = 0
|
|
40
69
|
end
|
|
41
70
|
|
|
@@ -69,9 +98,12 @@ module Pikuri
|
|
|
69
98
|
# can introspect it (and so tests can assert it)
|
|
70
99
|
attr_reader :step
|
|
71
100
|
|
|
72
|
-
# @return [String] short config dump for {Agent#to_s}
|
|
101
|
+
# @return [String] short config dump for {Agent#to_s}.
|
|
102
|
+
# The policy only renders when it's the non-default
|
|
103
|
+
# +:synthesize+, so existing banner output is unchanged.
|
|
73
104
|
def to_s
|
|
74
|
-
"
|
|
105
|
+
policy = @on_exhausted == :raise ? '' : ", on_exhausted=#{@on_exhausted}"
|
|
106
|
+
"StepLimit(max=#{@max}#{policy})"
|
|
75
107
|
end
|
|
76
108
|
end
|
|
77
109
|
end
|
data/lib/pikuri/agent/event.rb
CHANGED
|
@@ -19,6 +19,18 @@ module Pikuri
|
|
|
19
19
|
# +case+-match on the variant they care about. The per-variant
|
|
20
20
|
# docs below name the emission site for each (which {Agent}
|
|
21
21
|
# callback wires it and what payload it carries).
|
|
22
|
+
#
|
|
23
|
+
# == Sealed for loop narration; gems add domain events
|
|
24
|
+
#
|
|
25
|
+
# "Sealed" applies to the *loop-narration* vocabulary: the
|
|
26
|
+
# variants below are the complete set, all emitted by {Agent},
|
|
27
|
+
# and new chat-loop observability belongs here, not in a gem.
|
|
28
|
+
# Gems may define their own *domain* events in their own
|
|
29
|
+
# namespace (e.g. +Pikuri::Tasks::ListChanged+) and emit them
|
|
30
|
+
# via {ExtensionContext#emit_event}; they ride the same stream.
|
|
31
|
+
# Listeners must no-op on variants they don't recognize —
|
|
32
|
+
# {Listener::Base#on_event}'s default plus +case+-fallthrough
|
|
33
|
+
# give that for free.
|
|
22
34
|
module Event
|
|
23
35
|
# User's input for a turn (+mid_loop: false+, the default) or a
|
|
24
36
|
# host-supplied injection delivered while the loop is running
|
|
@@ -74,10 +86,11 @@ module Pikuri
|
|
|
74
86
|
Assistant = Data.define(:content)
|
|
75
87
|
|
|
76
88
|
# Streaming fragment of an assistant reasoning block, pulled
|
|
77
|
-
# off a +RubyLLM::Chunk+ during a
|
|
78
|
-
# by the per-chunk streaming block {Agent
|
|
79
|
-
#
|
|
80
|
-
#
|
|
89
|
+
# off a +RubyLLM::Chunk+ during a streaming completion.
|
|
90
|
+
# Emitted by the per-chunk streaming block {Agent#run_loop}
|
|
91
|
+
# passes to +Chat#complete+ when the agent's +streaming:+
|
|
92
|
+
# flag is on; empty fragments are filtered at the dispatch
|
|
93
|
+
# site.
|
|
81
94
|
#
|
|
82
95
|
# Preview-only, not authoritative: the {Thinking} event
|
|
83
96
|
# emitted from +after_message+ at the end of the round-trip
|
|
@@ -100,10 +113,10 @@ module Pikuri
|
|
|
100
113
|
ThinkingDelta = Data.define(:content)
|
|
101
114
|
|
|
102
115
|
# Streaming fragment of an assistant Markdown content block,
|
|
103
|
-
# pulled off a +RubyLLM::Chunk+ during a
|
|
104
|
-
# Emitted by the per-chunk streaming block
|
|
105
|
-
# {Agent
|
|
106
|
-
#
|
|
116
|
+
# pulled off a +RubyLLM::Chunk+ during a streaming
|
|
117
|
+
# completion. Emitted by the per-chunk streaming block
|
|
118
|
+
# {Agent#run_loop} passes to +Chat#complete+ when the
|
|
119
|
+
# agent's +streaming:+ flag is on; empty fragments are
|
|
107
120
|
# filtered at the dispatch site.
|
|
108
121
|
#
|
|
109
122
|
# Preview-only, same semantics as {ThinkingDelta}: the
|
|
@@ -173,16 +186,30 @@ module Pikuri
|
|
|
173
186
|
# {Listener::TokenLog#context_window_size} tracks.
|
|
174
187
|
Tokens = Data.define(:input, :output, :cached, :cache_creation, :thinking, :model_id)
|
|
175
188
|
|
|
176
|
-
# Model's resolved context-window cap. Emitted
|
|
177
|
-
# {Agent#initialize}
|
|
178
|
-
# {Agent
|
|
179
|
-
#
|
|
180
|
-
#
|
|
181
|
-
# {Listener::TokenLog} renders +ctx=<used>/<cap>+
|
|
182
|
-
# +ctx=<used>+ when +nil+ — pick the value off this
|
|
183
|
-
# cache it; non-caring listeners ignore.
|
|
189
|
+
# Model's resolved context-window cap. Emitted at construction
|
|
190
|
+
# by {Agent#initialize}, and again after every model switch
|
|
191
|
+
# (see {Agent#run_loop}'s +transport:+) since the cap is a
|
|
192
|
+
# property of the model. Carries +nil+ when no source produced
|
|
193
|
+
# a value (a non-llama server with no explicit cap). Listeners
|
|
194
|
+
# that care — {Listener::TokenLog} renders +ctx=<used>/<cap>+
|
|
195
|
+
# when set, +ctx=<used>+ when +nil+ — pick the value off this
|
|
196
|
+
# event and cache it; non-caring listeners ignore. A second
|
|
197
|
+
# ContextCap simply overwrites the first; the conversation is
|
|
198
|
+
# not re-baselined (a switch keeps the running context size).
|
|
184
199
|
ContextCap = Data.define(:cap)
|
|
185
200
|
|
|
201
|
+
# The agent switched to a different model mid-conversation,
|
|
202
|
+
# emitted by {Agent#run_loop} (via +apply_transport!+) just
|
|
203
|
+
# before the matching {ContextCap} for the new model. Carries
|
|
204
|
+
# the old and new {Agent::ChatTransport}s verbatim — unformatted
|
|
205
|
+
# by design, so each chrome presents them its own way (a TUI
|
|
206
|
+
# adds ANSI off +.model+, a web client adds CSS). The cap rides
|
|
207
|
+
# on the paired {ContextCap}, not here, so {Listener::TokenLog}
|
|
208
|
+
# needs no awareness of this event (its existing {ContextCap}
|
|
209
|
+
# arm picks up the new cap); a renderer wanting "switched to X
|
|
210
|
+
# (128k)" on one line correlates the two.
|
|
211
|
+
ModelSwitched = Data.define(:from, :to)
|
|
212
|
+
|
|
186
213
|
# Out-of-band notice that the agent had to take a rescue path.
|
|
187
214
|
# Emitted by {Agent#run_loop} when {Control::StepLimit} trips
|
|
188
215
|
# and the synthesizer fallback runs; carries the reason string
|