brute 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +7 -0
  2. data/lib/brute/agent_stream.rb +49 -0
  3. data/lib/brute/compactor.rb +105 -0
  4. data/lib/brute/doom_loop.rb +84 -0
  5. data/lib/brute/file_mutation_queue.rb +99 -0
  6. data/lib/brute/hooks.rb +84 -0
  7. data/lib/brute/middleware/base.rb +27 -0
  8. data/lib/brute/middleware/compaction_check.rb +56 -0
  9. data/lib/brute/middleware/doom_loop_detection.rb +33 -0
  10. data/lib/brute/middleware/llm_call.rb +28 -0
  11. data/lib/brute/middleware/reasoning_normalizer.rb +98 -0
  12. data/lib/brute/middleware/retry.rb +45 -0
  13. data/lib/brute/middleware/session_persistence.rb +29 -0
  14. data/lib/brute/middleware/token_tracking.rb +46 -0
  15. data/lib/brute/middleware/tool_error_tracking.rb +46 -0
  16. data/lib/brute/middleware/tracing.rb +34 -0
  17. data/lib/brute/orchestrator.rb +297 -0
  18. data/lib/brute/patches/anthropic_tool_role.rb +35 -0
  19. data/lib/brute/patches/buffer_nil_guard.rb +21 -0
  20. data/lib/brute/pipeline.rb +81 -0
  21. data/lib/brute/session.rb +86 -0
  22. data/lib/brute/snapshot_store.rb +49 -0
  23. data/lib/brute/system_prompt.rb +88 -0
  24. data/lib/brute/todo_store.rb +27 -0
  25. data/lib/brute/tools/delegate.rb +35 -0
  26. data/lib/brute/tools/fs_patch.rb +37 -0
  27. data/lib/brute/tools/fs_read.rb +37 -0
  28. data/lib/brute/tools/fs_remove.rb +31 -0
  29. data/lib/brute/tools/fs_search.rb +38 -0
  30. data/lib/brute/tools/fs_undo.rb +29 -0
  31. data/lib/brute/tools/fs_write.rb +26 -0
  32. data/lib/brute/tools/net_fetch.rb +37 -0
  33. data/lib/brute/tools/shell.rb +38 -0
  34. data/lib/brute/tools/todo_read.rb +15 -0
  35. data/lib/brute/tools/todo_write.rb +32 -0
  36. data/lib/brute.rb +121 -0
  37. metadata +101 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: d81acc813055cd8621c71b3d1c444b9c39cf9c316b0e4b41eddab60a3f27f85a
4
+ data.tar.gz: 746bd2d574c6203e80153d0c747e55cecff746b5aa61ae1b38c9471aaee8feef
5
+ SHA512:
6
+ metadata.gz: c18662ee0a25508ebd7df4015454b4b6e77eb7fa7e5a3f69fe2b9b1e64a8f0392fd1478cde5e5f310c85e1a683302346cc0528cb20beab80705029331ed7e4e7
7
+ data.tar.gz: 60047800ecee00b2c959ca9f12385540b2be949ebfffa3f16312dc534a0342a82e5bebe0227228461e2fcba634a76a38df7d3c70bcada0c40a3be37f82c156c0
@@ -0,0 +1,49 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Brute
4
+ # Bridges llm.rb's streaming callbacks to forge-rb's callback system.
5
+ #
6
+ # Text and reasoning chunks fire immediately as the LLM generates them.
7
+ # Tool calls spawn threads on arrival — tools start running while the
8
+ # response is still streaming. on_tool_result fires as each thread finishes.
9
+ #
10
+ class AgentStream < LLM::Stream
11
+ def initialize(on_content: nil, on_reasoning: nil, on_tool_call: nil, on_tool_result: nil)
12
+ @on_content = on_content
13
+ @on_reasoning = on_reasoning
14
+ @on_tool_call = on_tool_call
15
+ @on_tool_result = on_tool_result
16
+ end
17
+
18
+ def on_content(text)
19
+ @on_content&.call(text)
20
+ end
21
+
22
+ def on_reasoning_content(text)
23
+ @on_reasoning&.call(text)
24
+ end
25
+
26
+ def on_tool_call(tool, error)
27
+ @on_tool_call&.call(tool.name, tool.arguments)
28
+
29
+ if error
30
+ queue << error
31
+ @on_tool_result&.call(tool.name, error.value)
32
+ else
33
+ queue << LLM::Function::Task.new(spawn_with_callback(tool))
34
+ end
35
+ end
36
+
37
+ private
38
+
39
+ def spawn_with_callback(tool)
40
+ on_result = @on_tool_result
41
+ name = tool.name
42
+ Thread.new do
43
+ result = tool.call
44
+ on_result&.call(name, result.respond_to?(:value) ? result.value : result)
45
+ result
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,105 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Brute
4
+ # Context compaction service. When the conversation grows past configurable
5
+ # thresholds, older messages are summarized into a condensed form and the
6
+ # original messages are dropped, keeping the context window manageable.
7
+ #
8
+ # Modeled after forgecode's Compactor which uses an eviction window and
9
+ # retention window strategy.
10
+ class Compactor
11
+ DEFAULTS = {
12
+ token_threshold: 100_000, # Compact when estimated tokens exceed this
13
+ message_threshold: 200, # Compact when message count exceeds this
14
+ retention_window: 6, # Minimum recent messages to always keep
15
+ summary_model: nil, # Model for summarization (uses agent's model if nil)
16
+ }.freeze
17
+
18
+ attr_reader :config
19
+
20
+ def initialize(provider, **opts)
21
+ @provider = provider
22
+ @config = DEFAULTS.merge(opts)
23
+ end
24
+
25
+ # Check whether compaction should run based on current context state.
26
+ def should_compact?(messages, usage: nil)
27
+ return true if messages.size > @config[:message_threshold]
28
+ return true if usage && (usage.total_tokens || 0) > @config[:token_threshold]
29
+ false
30
+ end
31
+
32
+ # Compact the message history by summarizing older messages.
33
+ #
34
+ # Returns [summary_message, kept_messages] — the caller rebuilds
35
+ # the context from these.
36
+ def compact(messages)
37
+ total = messages.size
38
+ keep_count = [@config[:retention_window], total].min
39
+ return nil if total <= keep_count
40
+
41
+ old_messages = messages[0...(total - keep_count)]
42
+ recent_messages = messages[(total - keep_count)..]
43
+
44
+ summary_text = summarize(old_messages)
45
+
46
+ [summary_text, recent_messages]
47
+ end
48
+
49
+ private
50
+
51
+ def summarize(messages)
52
+ # Build a condensed representation of the conversation for the summarizer
53
+ conversation_text = messages.map { |m|
54
+ role = if m.respond_to?(:role)
55
+ m.role.to_s
56
+ else
57
+ "unknown"
58
+ end
59
+ content = if m.respond_to?(:content)
60
+ m.content.to_s[0..1000]
61
+ else
62
+ m.to_s[0..1000]
63
+ end
64
+
65
+ # Include tool call info for assistant messages
66
+ tool_info = ""
67
+ if m.respond_to?(:functions) && m.functions&.any?
68
+ calls = m.functions.map { |f| "#{f.name}(#{f.arguments.to_s[0..200]})" }
69
+ tool_info = " [tools: #{calls.join(", ")}]"
70
+ end
71
+
72
+ "#{role}:#{tool_info} #{content}"
73
+ }.join("\n---\n")
74
+
75
+ prompt = <<~PROMPT
76
+ Summarize this conversation history for context continuity. The summary will replace
77
+ these messages in the context window, so include everything the agent needs to continue
78
+ working effectively.
79
+
80
+ Structure your summary as:
81
+ ## Goal
82
+ What the user asked for.
83
+
84
+ ## Progress
85
+ - Files read, created, or modified (list paths)
86
+ - Commands executed and their outcomes
87
+ - Key decisions made
88
+
89
+ ## Current State
90
+ Where things stand right now — what's done and what remains.
91
+
92
+ ## Next Steps
93
+ What should happen next based on the conversation.
94
+
95
+ ---
96
+ CONVERSATION:
97
+ #{conversation_text}
98
+ PROMPT
99
+
100
+ model = @config[:summary_model] || "claude-sonnet-4-20250514"
101
+ res = @provider.complete(prompt, model: model)
102
+ res.content
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,84 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Brute
4
+ # Detects when the agent is stuck in a repeating pattern of tool calls.
5
+ #
6
+ # Two types of loops are detected:
7
+ # 1. Consecutive identical calls: [A, A, A] — same tool + same args
8
+ # 2. Repeating sequences: [A,B,C, A,B,C, A,B,C] — a pattern cycling
9
+ #
10
+ # When detected, a warning is injected into the context so the LLM
11
+ # can course-correct.
12
+ class DoomLoopDetector
13
+ DEFAULT_THRESHOLD = 3
14
+
15
+ attr_reader :threshold
16
+
17
+ def initialize(threshold: DEFAULT_THRESHOLD)
18
+ @threshold = threshold
19
+ end
20
+
21
+ # Extracts tool call signatures from the context's message buffer and
22
+ # checks for repeating patterns at the tail.
23
+ #
24
+ # Returns the repetition count if a loop is found, nil otherwise.
25
+ def detect(messages)
26
+ signatures = extract_signatures(messages)
27
+ return nil if signatures.size < @threshold
28
+
29
+ check_repeating_pattern(signatures)
30
+ end
31
+
32
+ # Build a human-readable warning message for the agent.
33
+ def warning_message(repetitions)
34
+ <<~MSG
35
+ SYSTEM NOTICE: Doom loop detected — the same tool call pattern has repeated #{repetitions} times.
36
+ You are stuck in a loop and not making progress. Stop and try a fundamentally different approach:
37
+ - Re-read the file to check your changes actually applied
38
+ - Try a different tool or strategy
39
+ - Break the problem into smaller steps
40
+ - If a command keeps failing, investigate why before retrying
41
+ MSG
42
+ end
43
+
44
+ private
45
+
46
+ # Extract [tool_name, arguments_json] pairs from assistant messages.
47
+ def extract_signatures(messages)
48
+ messages
49
+ .select { |m| m.respond_to?(:functions) && m.assistant? }
50
+ .flat_map { |m| m.functions.map { |f| [f.name.to_s, f.arguments.to_s] } }
51
+ end
52
+
53
+ # Check for repeating patterns of any length at the tail of the sequence.
54
+ # Returns the repetition count, or nil.
55
+ def check_repeating_pattern(sequence)
56
+ max_pattern_len = sequence.size / @threshold
57
+
58
+ (1..max_pattern_len).each do |pattern_len|
59
+ count = count_tail_repetitions(sequence, pattern_len)
60
+ return count if count >= @threshold
61
+ end
62
+
63
+ nil
64
+ end
65
+
66
+ # Count how many times a pattern of `length` repeats at the end of the sequence.
67
+ def count_tail_repetitions(sequence, length)
68
+ return 0 if sequence.size < length
69
+
70
+ pattern = sequence.last(length)
71
+ count = 1
72
+ pos = sequence.size - length
73
+
74
+ while pos >= length
75
+ candidate = sequence[(pos - length)...pos]
76
+ break unless candidate == pattern
77
+ count += 1
78
+ pos -= length
79
+ end
80
+
81
+ count
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,99 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Brute
4
+ # Per-file serialization queue for concurrent tool execution.
5
+ #
6
+ # When tools run in parallel (via threads or async fibers), multiple tools
7
+ # may target the same file simultaneously. Without serialization, a sequence
8
+ # like [read → patch → write] on the same file would race and lose edits.
9
+ #
10
+ # This module provides a single public method:
11
+ #
12
+ # Brute::FileMutationQueue.serialize("/path/to/file") do
13
+ # # snapshot + read + modify + write — all atomic for this path
14
+ # end
15
+ #
16
+ # Design (mirrors pi-mono's withFileMutationQueue):
17
+ # - Operations on the SAME file are serialized (run one at a time)
18
+ # - Operations on DIFFERENT files run fully in parallel (independent mutexes)
19
+ # - Symlink-aware: resolves real paths so aliases share one mutex
20
+ # - Error-safe: mutex is always released in `ensure`, so failures never deadlock
21
+ # - Self-cleaning: per-file mutexes are removed when no longer in use
22
+ #
23
+ # Ruby 3.4's Mutex is fiber-scheduler-aware, so this works correctly with
24
+ # both :thread and :task (Async) concurrency strategies.
25
+ #
26
+ module FileMutationQueue
27
+ @mutexes = {} # path → Mutex
28
+ @waiters = Hash.new(0) # path → number of threads/fibers waiting or holding
29
+ @guard = Mutex.new # protects @mutexes and @waiters
30
+
31
+ class << self
32
+ # Serialize a block of work for a given file path.
33
+ #
34
+ # Concurrent calls targeting the same canonical path will execute
35
+ # sequentially in FIFO order. Calls targeting different paths
36
+ # proceed in parallel with zero contention.
37
+ #
38
+ # @param path [String] The file path to serialize on.
39
+ # @yield The mutation work to perform (snapshot, read, write, etc.)
40
+ # @return Whatever the block returns.
41
+ def serialize(path, &block)
42
+ key = canonical_path(path)
43
+ mutex = acquire_mutex(key)
44
+
45
+ mutex.synchronize(&block)
46
+ ensure
47
+ release_mutex(key)
48
+ end
49
+
50
+ # Clear all tracked mutexes. Used in tests and session resets.
51
+ def clear!
52
+ @guard.synchronize do
53
+ @mutexes.clear
54
+ @waiters.clear
55
+ end
56
+ end
57
+
58
+ # Number of file paths currently tracked (for diagnostics).
59
+ def size
60
+ @guard.synchronize { @mutexes.size }
61
+ end
62
+
63
+ private
64
+
65
+ # Resolve a file path to a canonical key.
66
+ # Uses File.realpath to follow symlinks so that aliases to the
67
+ # same underlying file share one mutex. Falls back to
68
+ # File.expand_path for files that don't exist yet (e.g., new writes).
69
+ def canonical_path(path)
70
+ resolved = File.expand_path(path)
71
+ begin
72
+ File.realpath(resolved)
73
+ rescue Errno::ENOENT
74
+ resolved
75
+ end
76
+ end
77
+
78
+ # Get (or create) a mutex for a file path and increment the waiter count.
79
+ def acquire_mutex(key)
80
+ @guard.synchronize do
81
+ @mutexes[key] ||= Mutex.new
82
+ @waiters[key] += 1
83
+ @mutexes[key]
84
+ end
85
+ end
86
+
87
+ # Decrement the waiter count and clean up the mutex if no one else needs it.
88
+ def release_mutex(key)
89
+ @guard.synchronize do
90
+ @waiters[key] -= 1
91
+ if @waiters[key] <= 0
92
+ @mutexes.delete(key)
93
+ @waiters.delete(key)
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,84 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Brute
4
+ # Lifecycle hook system modeled after forgecode's Hook struct.
5
+ #
6
+ # Six lifecycle events fire during the orchestrator loop:
7
+ # :start — conversation processing begins
8
+ # :end — conversation processing ends
9
+ # :request — before each LLM API call
10
+ # :response — after each LLM response
11
+ # :toolcall_start — before a tool executes
12
+ # :toolcall_end — after a tool executes
13
+ #
14
+ # Hooks receive (event_name, context_hash) and can inspect or mutate
15
+ # the orchestrator state via the context hash.
16
+ module Hooks
17
+ # Base class. Subclass and override #on_<event> methods.
18
+ class Base
19
+ def call(event, **data)
20
+ method_name = :"on_#{event}"
21
+ send(method_name, **data) if respond_to?(method_name, true)
22
+ end
23
+
24
+ private
25
+
26
+ def on_start(**) = nil
27
+ def on_end(**) = nil
28
+ def on_request(**) = nil
29
+ def on_response(**) = nil
30
+ def on_toolcall_start(**) = nil
31
+ def on_toolcall_end(**) = nil
32
+ end
33
+
34
+ # Composes multiple hooks into one, firing them in order.
35
+ class Composite < Base
36
+ def initialize(*hooks)
37
+ @hooks = hooks
38
+ end
39
+
40
+ def call(event, **data)
41
+ @hooks.each { |h| h.call(event, **data) }
42
+ end
43
+
44
+ def <<(hook)
45
+ @hooks << hook
46
+ self
47
+ end
48
+ end
49
+
50
+ # Logs lifecycle events to a logger.
51
+ class Logging < Base
52
+ def initialize(logger)
53
+ @logger = logger
54
+ end
55
+
56
+ private
57
+
58
+ def on_start(**)
59
+ @logger.info("[brute] Conversation started")
60
+ end
61
+
62
+ def on_end(**)
63
+ @logger.info("[brute] Conversation ended")
64
+ end
65
+
66
+ def on_request(request_count: 0, **)
67
+ @logger.debug("[brute] LLM request ##{request_count}")
68
+ end
69
+
70
+ def on_response(tokens: nil, **)
71
+ @logger.debug("[brute] LLM response (tokens: #{tokens || "?"})")
72
+ end
73
+
74
+ def on_toolcall_start(tool_name: nil, **)
75
+ @logger.info("[brute] Tool call: #{tool_name}")
76
+ end
77
+
78
+ def on_toolcall_end(tool_name: nil, error: false, **)
79
+ status = error ? "FAILED" : "ok"
80
+ @logger.info("[brute] Tool result: #{tool_name} [#{status}]")
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Brute
4
+ module Middleware
5
+ # Base class for all middleware. Provides the standard Rack-style pattern:
6
+ #
7
+ # def call(env)
8
+ # # pre-processing
9
+ # response = @app.call(env)
10
+ # # post-processing
11
+ # response
12
+ # end
13
+ #
14
+ # Subclasses MUST call @app.call(env) unless they are intentionally
15
+ # short-circuiting (e.g., returning a cached response).
16
+ #
17
+ class Base
18
+ def initialize(app)
19
+ @app = app
20
+ end
21
+
22
+ def call(env)
23
+ @app.call(env)
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,56 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Brute
4
+ module Middleware
5
+ # Checks context size after each LLM call and triggers compaction
6
+ # when thresholds are exceeded.
7
+ #
8
+ # Runs POST-call: inspects message count and token usage from the
9
+ # response. If compaction is needed, summarizes older messages and
10
+ # rebuilds the context with the summary + recent messages.
11
+ #
12
+ class CompactionCheck < Base
13
+ def initialize(app, compactor:, system_prompt:, tools:)
14
+ super(app)
15
+ @compactor = compactor
16
+ @system_prompt = system_prompt
17
+ @tools = tools
18
+ end
19
+
20
+ def call(env)
21
+ response = @app.call(env)
22
+
23
+ ctx = env[:context]
24
+ messages = ctx.messages.to_a.compact
25
+ usage = ctx.usage rescue nil
26
+
27
+ if @compactor.should_compact?(messages, usage: usage)
28
+ result = @compactor.compact(messages)
29
+ if result
30
+ summary_text, _recent = result
31
+ rebuild_context!(env, summary_text)
32
+ env[:metadata][:compaction] = {
33
+ messages_before: messages.size,
34
+ timestamp: Time.now.iso8601,
35
+ }
36
+ end
37
+ end
38
+
39
+ response
40
+ end
41
+
42
+ private
43
+
44
+ def rebuild_context!(env, summary_text)
45
+ provider = env[:provider]
46
+ new_ctx = LLM::Context.new(provider, tools: @tools)
47
+ prompt = new_ctx.prompt do |p|
48
+ p.system @system_prompt
49
+ p.user "[Previous conversation summary]\n\n#{summary_text}"
50
+ end
51
+ new_ctx.talk(prompt)
52
+ env[:context] = new_ctx
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Brute
4
+ module Middleware
5
+ # Detects when the agent is stuck repeating tool call patterns and injects
6
+ # a corrective warning into the context before the next LLM call.
7
+ #
8
+ # Runs PRE-call: inspects the conversation history for repeating tool call
9
+ # patterns. If detected, talks a warning message into the context so the
10
+ # LLM sees it as input alongside the normal tool results.
11
+ #
12
+ class DoomLoopDetection < Base
13
+ def initialize(app, threshold: 3)
14
+ super(app)
15
+ @detector = Brute::DoomLoopDetector.new(threshold: threshold)
16
+ end
17
+
18
+ def call(env)
19
+ ctx = env[:context]
20
+ messages = ctx.messages.to_a
21
+
22
+ if (reps = @detector.detect(messages))
23
+ warning = @detector.warning_message(reps)
24
+ # Inject the warning as a user message so the LLM sees it
25
+ ctx.talk(warning)
26
+ env[:metadata][:doom_loop_detected] = reps
27
+ end
28
+
29
+ @app.call(env)
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Brute
4
+ module Middleware
5
+ # The terminal "app" in the pipeline — performs the actual LLM call.
6
+ #
7
+ # When streaming, on_content fires incrementally via AgentStream.
8
+ # When not streaming, fires on_content post-hoc with the full text.
9
+ #
10
+ class LLMCall
11
+ def call(env)
12
+ ctx = env[:context]
13
+ response = ctx.talk(env[:input])
14
+
15
+ # Only fire on_content post-hoc when NOT streaming
16
+ # (streaming delivers chunks incrementally via AgentStream)
17
+ unless env[:streaming]
18
+ if (cb = env.dig(:callbacks, :on_content)) && response
19
+ text = response.respond_to?(:content) ? response.content : nil
20
+ cb.call(text) if text
21
+ end
22
+ end
23
+
24
+ response
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,98 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Brute
4
+ module Middleware
5
+ # Handles reasoning/thinking content across model switches.
6
+ #
7
+ # PRE-call:
8
+ # - If reasoning is enabled, injects provider-specific params into
9
+ # the env (e.g., Anthropic thinking config, OpenAI reasoning_effort).
10
+ # - Tracks which model produced each message. When the model changes,
11
+ # strips reasoning_content from messages produced by the old model
12
+ # (signatures are model-specific and cryptographically tied).
13
+ #
14
+ # POST-call:
15
+ # - Records the current model on the response for future normalization.
16
+ #
17
+ # llm.rb exposes:
18
+ # - response.reasoning_content — the thinking text
19
+ # - response.reasoning_tokens — token count
20
+ # - Provider params pass-through — we can send thinking:, reasoning_effort:, etc.
21
+ #
22
+ class ReasoningNormalizer < Base
23
+ # Effort levels that map to provider-specific params.
24
+ # Mirrors forgecode's Effort enum.
25
+ EFFORT_LEVELS = {
26
+ none: "none",
27
+ minimal: "low",
28
+ low: "low",
29
+ medium: "medium",
30
+ high: "high",
31
+ xhigh: "high",
32
+ max: "high",
33
+ }.freeze
34
+
35
+ def initialize(app, model_id: nil, effort: :medium, enabled: true, budget_tokens: nil)
36
+ super(app)
37
+ @model_id = model_id
38
+ @effort = effort
39
+ @enabled = enabled
40
+ @budget_tokens = budget_tokens
41
+ @message_models = [] # tracks which model produced each assistant message
42
+ end
43
+
44
+ def call(env)
45
+ if @enabled
46
+ inject_reasoning_params!(env)
47
+ end
48
+
49
+ response = @app.call(env)
50
+
51
+ # POST: record which model produced this response
52
+ if response
53
+ @message_models << @model_id
54
+ end
55
+
56
+ response
57
+ end
58
+
59
+ # Update the active model (e.g., when user switches models mid-session).
60
+ def model_id=(new_model)
61
+ @model_id = new_model
62
+ end
63
+
64
+ private
65
+
66
+ def inject_reasoning_params!(env)
67
+ env[:params] ||= {}
68
+ provider = env[:provider]
69
+
70
+ case provider_type(provider)
71
+ when :anthropic
72
+ if @budget_tokens
73
+ # Older extended thinking API (claude-3.7-sonnet style)
74
+ env[:params][:thinking] = {type: "enabled", budget_tokens: @budget_tokens}
75
+ else
76
+ # Newer effort-based API (claude-4 style) — pass through
77
+ # Anthropic handles this via the model itself
78
+ end
79
+ when :openai
80
+ env[:params][:reasoning_effort] = EFFORT_LEVELS[@effort] || "medium"
81
+ end
82
+ end
83
+
84
+ def provider_type(provider)
85
+ class_name = provider.class.name.to_s.downcase
86
+ if class_name.include?("anthropic")
87
+ :anthropic
88
+ elsif class_name.include?("openai")
89
+ :openai
90
+ elsif class_name.include?("google") || class_name.include?("gemini")
91
+ :google
92
+ else
93
+ :unknown
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end