pikuri-core 0.0.4 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f26e9b56204d1fbbaf64390e643a26c4183b3c21d45e3dcc43984c677a09f400
4
- data.tar.gz: e657171d9440ef19a53ae5ad9335c62ff34ea8db62c9753ce79352f699122f06
3
+ metadata.gz: ac822a7bd46228f2eea2994c2e1428e3aa90c269e6ebafd603474fb630ba34ee
4
+ data.tar.gz: 00c69d139bc38c1a881bf87980970672517db51f59469aef266009a803a874db
5
5
  SHA512:
6
- metadata.gz: 4f53aef4566f6218750c58b0cac4d1015d9ae2d2a1d464481ff4eabae3d42eb560559517a105ccd6fb2128e0b8e1b9393fdea257757dd6c85af0dc7a47683ed3
7
- data.tar.gz: ba4adbf32911a499111eaf26057622e94fa27511aaefb5f9fb705ac3471a20d860536d1717e6933ec3e2b55af152fbfb8b479f99e11e44ea9ad907dd1c666a66
6
+ metadata.gz: d148d78b2027d747ef10f4dd7a19252f66bb1b5f99e8eef763149bf3e93ae608a3f7cf739b02ed4d92ab3ab39d8fe6888615a5acaf90dcb44ca783058a95a716
7
+ data.tar.gz: 32ef75bbd6d825970e5a1e6b5e27cf0fc812980e87e5ddd59b14f2c10772c91cf93003c2ebe2c9501f5e9682e726f77c4c387c7c83b467b9aa57dc91a9a178f0
@@ -127,8 +127,15 @@ module Pikuri
127
127
  # @param step_limit [Control::StepLimit, nil]
128
128
  # @param cancellable [Control::Cancellable, nil]
129
129
  # @param interloper [Control::Interloper, nil]
130
+ # @param on_close_sink [Array<Proc>, nil] array that {#on_close}
131
+ # appends to. {Agent#initialize} passes its own live
132
+ # +@on_close_handlers+ so a handler an extension arms via
133
+ # +c.on_close+ is reachable the instant it's registered — which
134
+ # is what lets the constructor close a half-built agent if a
135
+ # later extension's +configure+ raises. Defaults to a fresh
136
+ # array for standalone use (e.g. specs).
130
137
  def initialize(transport:, system_prompt_base:, id:, streaming:,
131
- step_limit:, cancellable:, interloper:)
138
+ step_limit:, cancellable:, interloper:, on_close_sink: nil)
132
139
  @transport = transport
133
140
  @system_prompt_base = system_prompt_base
134
141
  @id = id
@@ -141,7 +148,7 @@ module Pikuri
141
148
  @sub_agent_tools = []
142
149
  @listeners = []
143
150
  @system_prompt_additions = []
144
- @on_close_handlers = []
151
+ @on_close_handlers = on_close_sink || []
145
152
  @extensions = []
146
153
  end
147
154
 
@@ -2,6 +2,7 @@
2
2
 
3
3
  require 'faraday'
4
4
  require 'json'
5
+ require 'cgi'
5
6
 
6
7
  module Pikuri
7
8
  class Agent
@@ -31,6 +32,20 @@ module Pikuri
31
32
  # (typically +bin/pikuri-chat+) derives the right URL from its configured
32
33
  # base.
33
34
  #
35
+ # == llama.cpp router mode
36
+ #
37
+ # A llama.cpp *router* (the multi-instance front that proxies to N
38
+ # on-demand model servers) answers a bare +/props+ with
39
+ # +{"role":"router", ..., "n_ctx":0}+ — there is no single loaded
40
+ # model at the router itself, so its top-level +n_ctx+ is +0+. The
41
+ # real per-model cap is one proxied hop away: +GET /props?model=<id>+
42
+ # routes the probe to that model's instance, whose +/props+ carries
43
+ # the launched +n_ctx+. So when the bare probe reports +role: router+
44
+ # and a +model_id+ is known, this re-probes with the model id before
45
+ # giving up. A plain single-model server is untouched: its bare
46
+ # +/props+ already carries a positive +n_ctx+, so the router branch
47
+ # never runs.
48
+ #
34
49
  # == Failure handling
35
50
  #
36
51
  # The probe is best-effort. HTTP error, timeout, non-JSON body, or a
@@ -64,10 +79,15 @@ module Pikuri
64
79
  # +RubyLLM::Chat#model.context_window+
65
80
  # @param llama_probe_url [String, nil] full URL to llama.cpp +/props+;
66
81
  # +nil+ or empty string skips the probe
67
- def initialize(override:, ruby_llm_reported:, llama_probe_url:)
82
+ # @param model_id [String, nil] the chat model id, used only to
83
+ # follow a llama.cpp router via +/props?model=<id>+ when the bare
84
+ # probe reports +role: router+. +nil+ or empty disables that
85
+ # second hop.
86
+ def initialize(override:, ruby_llm_reported:, llama_probe_url:, model_id: nil)
68
87
  @override = override
69
88
  @ruby_llm_reported = ruby_llm_reported
70
89
  @llama_probe_url = llama_probe_url
90
+ @model_id = model_id
71
91
  end
72
92
 
73
93
  # @return [Integer, nil] resolved cap, or +nil+ if no source produced
@@ -83,25 +103,65 @@ module Pikuri
83
103
  private
84
104
 
85
105
  def probe_llama_cpp
86
- response = Faraday.new(
87
- request: { open_timeout: OPEN_TIMEOUT, timeout: READ_TIMEOUT }
88
- ).get(@llama_probe_url) do |req|
89
- req.headers['Accept'] = 'application/json'
90
- end
106
+ data = fetch_props(@llama_probe_url)
107
+ return nil if data.nil?
91
108
 
92
- return warn_and_nil("HTTP #{response.status} from #{@llama_probe_url}") unless response.status == 200
109
+ n_ctx = positive_n_ctx(data)
110
+ return n_ctx if n_ctx
93
111
 
94
- data = JSON.parse(response.body)
95
- n_ctx = data.dig('default_generation_settings', 'n_ctx')
96
- return n_ctx if n_ctx.is_a?(Integer) && n_ctx.positive?
112
+ # llama.cpp router: the bare /props carries no model, so its
113
+ # n_ctx is 0. Follow the router to the model's own instance.
114
+ return probe_router_model if data['role'] == 'router' && model_id_present?
97
115
 
98
116
  warn_and_nil(
99
117
  "no positive integer at default_generation_settings.n_ctx in #{@llama_probe_url} response"
100
118
  )
119
+ end
120
+
121
+ def probe_router_model
122
+ url = "#{@llama_probe_url}?model=#{CGI.escape(@model_id)}"
123
+ data = fetch_props(url)
124
+ return nil if data.nil?
125
+
126
+ n_ctx = positive_n_ctx(data)
127
+ return n_ctx if n_ctx
128
+
129
+ warn_and_nil(
130
+ "no positive integer at default_generation_settings.n_ctx in router probe #{url}"
131
+ )
132
+ end
133
+
134
+ # GETs +url+ and parses its JSON body.
135
+ #
136
+ # @param url [String] a llama.cpp +/props+ URL
137
+ # @return [Hash, nil] the parsed body, or +nil+ (after one +warn+
138
+ # line) on non-200, timeout, transport error, or non-JSON body
139
+ def fetch_props(url)
140
+ response = Faraday.new(
141
+ request: { open_timeout: OPEN_TIMEOUT, timeout: READ_TIMEOUT }
142
+ ).get(url) do |req|
143
+ req.headers['Accept'] = 'application/json'
144
+ end
145
+
146
+ return warn_and_nil("HTTP #{response.status} from #{url}") unless response.status == 200
147
+
148
+ JSON.parse(response.body)
101
149
  rescue Faraday::Error, JSON::ParserError => e
102
150
  warn_and_nil("#{e.class.name.split('::').last}: #{e.message}")
103
151
  end
104
152
 
153
+ # @param data [Hash] a parsed +/props+ body
154
+ # @return [Integer, nil] the launched +n_ctx+ when present and
155
+ # positive, else +nil+
156
+ def positive_n_ctx(data)
157
+ n_ctx = data.dig('default_generation_settings', 'n_ctx')
158
+ n_ctx if n_ctx.is_a?(Integer) && n_ctx.positive?
159
+ end
160
+
161
+ def model_id_present?
162
+ !@model_id.nil? && !@model_id.empty?
163
+ end
164
+
105
165
  def warn_and_nil(reason)
106
166
  LOGGER.warn("llama.cpp /props probe failed: #{reason}")
107
167
  nil
@@ -123,6 +123,14 @@ module Pikuri
123
123
  @mutex.synchronize { !@items.empty? }
124
124
  end
125
125
 
126
+ # @return [Integer] number of pending injections; like
127
+ # {#pending?} and {#peek}, a snapshot observable from
128
+ # any thread — by the time the caller reads it the
129
+ # queue may already have drained
130
+ def size
131
+ @mutex.synchronize { @items.size }
132
+ end
133
+
126
134
  # Atomically take and remove all pending items. Called by
127
135
  # {Agent}'s +after_tool_result+ wiring; the +Agent+ then
128
136
  # appends each item to the chat history and emits an
@@ -147,8 +155,8 @@ module Pikuri
147
155
  # the pending-count so a debug print or banner can tell
148
156
  # an idle interloper apart from one with queued items
149
157
  def to_s
150
- size = @mutex.synchronize { @items.size }
151
- size.zero? ? 'Interloper' : "Interloper(#{size} pending)"
158
+ pending = size
159
+ pending.zero? ? 'Interloper' : "Interloper(#{pending} pending)"
152
160
  end
153
161
  end
154
162
  end
@@ -45,6 +45,21 @@ module Pikuri
45
45
  end
46
46
  end
47
47
 
48
+ # A system-role block an {Extension#on_user_message} hook
49
+ # injected into the chat log — recalled reference (memory
50
+ # context, retrieved snippets) tagged +role: :system+ so the
51
+ # model reads it as background, not new user input. Carries
52
+ # the injected text verbatim.
53
+ #
54
+ # Emitted by {Agent#dispatch_ext_on_user_message}, once per
55
+ # extension that returns a non-empty block, at the same site
56
+ # that grows the chat log — so the event stream stays a
57
+ # faithful mirror of what the model actually sees. Without it
58
+ # an injection is invisible: it never surfaces in the stream,
59
+ # only as a secondary echo in the assistant's later reasoning.
60
+ # {Listener::Terminal} renders it dim grey with a +⊕+ marker.
61
+ SystemInjected = Data.define(:content)
62
+
48
63
  # Assistant reasoning ("thinking") block, extracted from the
49
64
  # +thinking.text+ field on a +RubyLLM::Message+ with role
50
65
  # +:assistant+. Emitted by {Agent}'s +after_message+ wiring;
@@ -5,17 +5,20 @@ module Pikuri
5
5
  # The Extension protocol — how hosts bolt extra capabilities
6
6
  # (system-prompt snippets, tools, lifecycle hooks) onto an
7
7
  # {Agent}. Extensions are added via {Configurator#add_extension}
8
- # inside the +Agent.new+ block; the Agent then drives two hooks
9
- # on each — {#configure} during the block, {#bind} once the
10
- # agent is fully constructed.
8
+ # inside the +Agent.new+ block; the Agent then drives three hooks
9
+ # on each — {#configure} during the block, {#bind} once the agent
10
+ # is fully constructed, and {#on_user_message} on every user turn
11
+ # thereafter.
11
12
  #
12
13
  # Mix this module into an extension class to inherit empty
13
- # default implementations of both hooks; override the ones you
14
- # need. Extensions that don't +include+ this module still work
15
- # if they define both methods themselves (the Agent and
16
- # Configurator call them by name) the module exists to make
17
- # the protocol *explicit* and to give "I want to implement just
18
- # +configure+" extensions a free no-op +bind+ (and vice versa).
14
+ # default implementations of all three hooks; override the ones
15
+ # you need. Extensions that don't +include+ this module still
16
+ # work *if they define all three methods themselves* the Agent
17
+ # and Configurator call them by name with no +respond_to?+ guard,
18
+ # so a missing one raises. The module exists to make the protocol
19
+ # *explicit* and to give "I want to implement just +configure+"
20
+ # extensions free no-op +bind+ / +on_user_message+ defaults (and
21
+ # any other combination).
19
22
  #
20
23
  # == Example
21
24
  #
@@ -75,6 +78,31 @@ module Pikuri
75
78
  # @param agent [Agent] the live agent, fully wired
76
79
  # @return [void]
77
80
  def bind(agent); end
81
+
82
+ # Optional per-turn hook fired by the {Agent} after a user-message
83
+ # is added to the chat. The
84
+ # default is a no-op returning +nil+; override and return {String}
85
+ # to emit a `:system` message with that text.
86
+ #
87
+ # == Append-only, never mutate
88
+ #
89
+ # The Agent only ever *appends* the returned block at the tail; it never
90
+ # rewrites or removes an earlier one. Mutating mid-log would bust the
91
+ # provider prefix cache for every message after the edit. Stale blocks
92
+ # ride the existing context-window machinery, not a per-turn rewrite.
93
+ #
94
+ # == Not inherited by sub-agents
95
+ #
96
+ # Like the rest of the extension surface, this fires on the parent agent
97
+ # only — sub-agents do not inherit extensions, so a persona's turns are
98
+ # never prefetched or recorded by the parent's memory.
99
+ #
100
+ # @param agent [Agent] the live agent whose turn this is
101
+ # @param content [String] the user message (initial or interloper) about
102
+ # to be sent to the model
103
+ # @return [String, nil] an optional block of text to be injected verbatim as
104
+ # a system-role message (after the user message), or +nil+ to inject nothing
105
+ def on_user_message(agent, content); end
78
106
  end
79
107
  end
80
108
  end
@@ -1,19 +1,32 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'rainbow'
4
- require 'tty-markdown'
5
4
 
6
5
  module Pikuri
7
6
  class Agent
8
7
  module Listener
9
8
  # Terminal renderer for the normalized event stream: dim grey
10
- # reasoning, Markdown-rendered assistant content, cyan tool-
11
- # call and tool-result lines, yellow fallback notice, red
12
- # cancelled notice. {Event::UserTurn} is intentionally silent
9
+ # reasoning, assistant content printed raw (Markdown as-is),
10
+ # cyan tool-call and tool-result lines, yellow fallback
11
+ # notice, red cancelled notice. An {Event::SystemInjected} block (recalled
12
+ # memory / context an extension injected) renders dim grey
13
+ # with a +⊕+ marker. {Event::UserTurn} is intentionally silent
13
14
  # (the terminal user just typed the message, so re-rendering
14
15
  # it adds nothing); {Event::Tokens} and {Event::ContextCap}
15
16
  # are silent too (their consumer is {TokenLog}).
16
17
  #
18
+ # Assistant Markdown deliberately prints raw, with no
19
+ # Markdown-to-ANSI rendering. A renderer (+tty-markdown+)
20
+ # used to sit on the non-streaming path; it was dropped:
21
+ # rendering can never apply to the streaming path anyway
22
+ # (half-finished Markdown — broken code fences, half-built
23
+ # tables — doesn't render), the gem hadn't shipped a release
24
+ # since 2023 (its known ANSI-in-table crashes forced a
25
+ # rescue-and-degrade carve-out here), and it pulled seven
26
+ # transitive gems into the audit surface. Raw Markdown is
27
+ # perfectly readable in a terminal; proper rendering belongs
28
+ # to a richer host (the planned pikuri-tui).
29
+ #
17
30
  # Optionally prepends a fixed number of leading spaces to
18
31
  # every rendered line via the +padding:+ kwarg. Sub-agents
19
32
  # get a fresh padded instance through {#for_sub_agent}
@@ -28,11 +41,8 @@ module Pikuri
28
41
  # - {Event::ThinkingDelta} fragments print live in the same
29
42
  # dim grey as the non-streaming {Event::Thinking}, with no
30
43
  # trailing newline so the next fragment continues the line.
31
- # - {Event::AssistantDelta} fragments print live, *raw* — no
32
- # Markdown render. tty-markdown can't render half-finished
33
- # Markdown (broken code blocks, half-rendered tables), so
34
- # the live stream gives up formatting in exchange for
35
- # liveness.
44
+ # - {Event::AssistantDelta} fragments print live the same
45
+ # way, uncolored.
36
46
  # - {Event::Thinking} and {Event::Assistant} bookends print
37
47
  # a single blank line as a stream terminator, not their
38
48
  # content (the content already landed via the deltas). The
@@ -43,15 +53,6 @@ module Pikuri
43
53
  # the deltas are silently ignored and the bookend events
44
54
  # render the full text the way they always have.
45
55
  class Terminal < Base
46
- # Subsystem logger; set its level with +PIKURI_LOG_TERMINAL+
47
- # or the global +PIKURI_LOG+. Used for the narrow rescue
48
- # around third-party rendering (+tty-markdown+ choking on
49
- # assistant output) — see the CLAUDE.md "secondary to the
50
- # loop" carve-out.
51
- #
52
- # @return [Logger]
53
- LOGGER = Pikuri.logger_for('Terminal')
54
-
55
56
  # Cap, in characters, applied to tool-result content
56
57
  # rendered to the terminal. Anything longer is truncated
57
58
  # with a marker that reports the original byte size so the
@@ -117,12 +118,14 @@ module Pikuri
117
118
  if @streaming
118
119
  terminate_stream
119
120
  else
120
- println(indent(render_markdown(content)))
121
+ println(indent(content))
121
122
  end
122
123
  in Event::ThinkingDelta(content:)
123
124
  stream_fragment(Rainbow(content).color(85, 85, 85)) if @streaming
124
125
  in Event::AssistantDelta(content:)
125
126
  stream_fragment(content) if @streaming
127
+ in Event::SystemInjected(content:)
128
+ println(indent(Rainbow("⊕ #{content}").color(85, 85, 85)))
126
129
  in Event::ToolCall(name:, arguments:)
127
130
  args = arguments.map { |k, v| "#{k}=#{v.inspect}" }.join(', ')
128
131
  println(indent(Rainbow("→ #{name}(#{args})").cyan))
@@ -224,23 +227,6 @@ module Pikuri
224
227
  text.to_s.each_line.map { |line| prefix + line }.join
225
228
  end
226
229
 
227
- # Render assistant Markdown for the terminal, degrading to
228
- # the raw string when the renderer raises. tty-markdown /
229
- # strings have known bugs around ANSI inside tables (e.g.
230
- # +Strings::Wrap.insert_ansi+ raising +IndexError+); we'd
231
- # rather show ugly Markdown than abort an in-flight
232
- # conversation.
233
- #
234
- # @param content [String] assistant Markdown
235
- # @return [String] rendered ANSI text, or +content+
236
- # unchanged on render failure
237
- def render_markdown(content)
238
- TTY::Markdown.parse(content)
239
- rescue StandardError => e
240
- LOGGER.warn("TTY::Markdown render failed (#{e.class}: #{e.message}); falling back to raw text")
241
- content
242
- end
243
-
244
230
  # Flatten whitespace and cap to {MAX_TOOL_RESULT_CHARS}. The
245
231
  # cap keeps multi-screen dumps (rendered HTML, PDF text)
246
232
  # from drowning the terminal stream; the byte-count suffix