swarm_sdk 2.0.0.pre.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/swarm_sdk/agent/builder.rb +333 -0
- data/lib/swarm_sdk/agent/chat/context_tracker.rb +271 -0
- data/lib/swarm_sdk/agent/chat/hook_integration.rb +372 -0
- data/lib/swarm_sdk/agent/chat/logging_helpers.rb +99 -0
- data/lib/swarm_sdk/agent/chat/system_reminder_injector.rb +114 -0
- data/lib/swarm_sdk/agent/chat.rb +779 -0
- data/lib/swarm_sdk/agent/context.rb +108 -0
- data/lib/swarm_sdk/agent/definition.rb +335 -0
- data/lib/swarm_sdk/configuration.rb +251 -0
- data/lib/swarm_sdk/context_compactor/metrics.rb +147 -0
- data/lib/swarm_sdk/context_compactor/token_counter.rb +106 -0
- data/lib/swarm_sdk/context_compactor.rb +340 -0
- data/lib/swarm_sdk/hooks/adapter.rb +359 -0
- data/lib/swarm_sdk/hooks/context.rb +163 -0
- data/lib/swarm_sdk/hooks/definition.rb +80 -0
- data/lib/swarm_sdk/hooks/error.rb +29 -0
- data/lib/swarm_sdk/hooks/executor.rb +146 -0
- data/lib/swarm_sdk/hooks/registry.rb +143 -0
- data/lib/swarm_sdk/hooks/result.rb +150 -0
- data/lib/swarm_sdk/hooks/shell_executor.rb +254 -0
- data/lib/swarm_sdk/hooks/tool_call.rb +35 -0
- data/lib/swarm_sdk/hooks/tool_result.rb +62 -0
- data/lib/swarm_sdk/log_collector.rb +83 -0
- data/lib/swarm_sdk/log_stream.rb +69 -0
- data/lib/swarm_sdk/markdown_parser.rb +46 -0
- data/lib/swarm_sdk/permissions/config.rb +239 -0
- data/lib/swarm_sdk/permissions/error_formatter.rb +121 -0
- data/lib/swarm_sdk/permissions/path_matcher.rb +35 -0
- data/lib/swarm_sdk/permissions/validator.rb +173 -0
- data/lib/swarm_sdk/permissions_builder.rb +122 -0
- data/lib/swarm_sdk/prompts/base_system_prompt.md.erb +237 -0
- data/lib/swarm_sdk/providers/openai_with_responses.rb +582 -0
- data/lib/swarm_sdk/result.rb +97 -0
- data/lib/swarm_sdk/swarm/agent_initializer.rb +224 -0
- data/lib/swarm_sdk/swarm/all_agents_builder.rb +62 -0
- data/lib/swarm_sdk/swarm/builder.rb +240 -0
- data/lib/swarm_sdk/swarm/mcp_configurator.rb +151 -0
- data/lib/swarm_sdk/swarm/tool_configurator.rb +267 -0
- data/lib/swarm_sdk/swarm.rb +837 -0
- data/lib/swarm_sdk/tools/bash.rb +274 -0
- data/lib/swarm_sdk/tools/delegate.rb +152 -0
- data/lib/swarm_sdk/tools/document_converters/base_converter.rb +83 -0
- data/lib/swarm_sdk/tools/document_converters/docx_converter.rb +99 -0
- data/lib/swarm_sdk/tools/document_converters/pdf_converter.rb +78 -0
- data/lib/swarm_sdk/tools/document_converters/xlsx_converter.rb +194 -0
- data/lib/swarm_sdk/tools/edit.rb +150 -0
- data/lib/swarm_sdk/tools/glob.rb +158 -0
- data/lib/swarm_sdk/tools/grep.rb +231 -0
- data/lib/swarm_sdk/tools/image_extractors/docx_image_extractor.rb +43 -0
- data/lib/swarm_sdk/tools/image_extractors/pdf_image_extractor.rb +163 -0
- data/lib/swarm_sdk/tools/image_formats/tiff_builder.rb +65 -0
- data/lib/swarm_sdk/tools/multi_edit.rb +232 -0
- data/lib/swarm_sdk/tools/path_resolver.rb +43 -0
- data/lib/swarm_sdk/tools/read.rb +251 -0
- data/lib/swarm_sdk/tools/registry.rb +73 -0
- data/lib/swarm_sdk/tools/scratchpad_list.rb +88 -0
- data/lib/swarm_sdk/tools/scratchpad_read.rb +59 -0
- data/lib/swarm_sdk/tools/scratchpad_write.rb +88 -0
- data/lib/swarm_sdk/tools/stores/read_tracker.rb +61 -0
- data/lib/swarm_sdk/tools/stores/scratchpad.rb +153 -0
- data/lib/swarm_sdk/tools/stores/todo_manager.rb +65 -0
- data/lib/swarm_sdk/tools/todo_write.rb +216 -0
- data/lib/swarm_sdk/tools/write.rb +117 -0
- data/lib/swarm_sdk/utils.rb +50 -0
- data/lib/swarm_sdk/version.rb +5 -0
- data/lib/swarm_sdk.rb +69 -0
- metadata +169 -0
@@ -0,0 +1,147 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SwarmSDK
|
4
|
+
class ContextCompactor
|
5
|
+
# Metrics tracks compression statistics
|
6
|
+
#
|
7
|
+
# Provides detailed information about the compression operation:
|
8
|
+
# - Message counts (before/after)
|
9
|
+
# - Token counts (before/after)
|
10
|
+
# - Compression ratio
|
11
|
+
# - Time taken
|
12
|
+
# - Summary of changes
|
13
|
+
#
|
14
|
+
# ## Usage
|
15
|
+
#
|
16
|
+
# metrics = agent.compact_context
|
17
|
+
# puts metrics.summary
|
18
|
+
# puts "Compressed from #{metrics.original_tokens} to #{metrics.compressed_tokens} tokens"
|
19
|
+
# puts "Compression ratio: #{(metrics.compression_ratio * 100).round(1)}%"
|
20
|
+
#
|
21
|
+
class Metrics
|
22
|
+
attr_reader :original_messages, :compressed_messages, :time_taken
|
23
|
+
|
24
|
+
# Initialize metrics from compression operation
|
25
|
+
#
|
26
|
+
# @param original_messages [Array<RubyLLM::Message>] Messages before compression
|
27
|
+
# @param compressed_messages [Array<RubyLLM::Message>] Messages after compression
|
28
|
+
# @param time_taken [Float] Time taken in seconds
|
29
|
+
def initialize(original_messages:, compressed_messages:, time_taken:)
|
30
|
+
@original_messages = original_messages
|
31
|
+
@compressed_messages = compressed_messages
|
32
|
+
@time_taken = time_taken
|
33
|
+
end
|
34
|
+
|
35
|
+
# Number of messages before compression
|
36
|
+
#
|
37
|
+
# @return [Integer] Original message count
|
38
|
+
def original_message_count
|
39
|
+
@original_messages.size
|
40
|
+
end
|
41
|
+
|
42
|
+
# Number of messages after compression
|
43
|
+
#
|
44
|
+
# @return [Integer] Compressed message count
|
45
|
+
def compressed_message_count
|
46
|
+
@compressed_messages.size
|
47
|
+
end
|
48
|
+
|
49
|
+
# Number of messages removed
|
50
|
+
#
|
51
|
+
# @return [Integer] Messages removed
|
52
|
+
def messages_removed
|
53
|
+
original_message_count - compressed_message_count
|
54
|
+
end
|
55
|
+
|
56
|
+
# Number of checkpoint summary messages created
|
57
|
+
#
|
58
|
+
# @return [Integer] Checkpoint messages
|
59
|
+
def messages_summarized
|
60
|
+
@compressed_messages.count do |msg|
|
61
|
+
msg.role == :system && msg.content.to_s.include?("CONVERSATION CHECKPOINT")
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# Estimated tokens before compression
|
66
|
+
#
|
67
|
+
# @return [Integer] Original token count
|
68
|
+
def original_tokens
|
69
|
+
@original_tokens ||= TokenCounter.estimate_messages(@original_messages)
|
70
|
+
end
|
71
|
+
|
72
|
+
# Estimated tokens after compression
|
73
|
+
#
|
74
|
+
# @return [Integer] Compressed token count
|
75
|
+
def compressed_tokens
|
76
|
+
@compressed_tokens ||= TokenCounter.estimate_messages(@compressed_messages)
|
77
|
+
end
|
78
|
+
|
79
|
+
# Number of tokens removed
|
80
|
+
#
|
81
|
+
# @return [Integer] Tokens removed
|
82
|
+
def tokens_removed
|
83
|
+
original_tokens - compressed_tokens
|
84
|
+
end
|
85
|
+
|
86
|
+
# Compression ratio (compressed / original)
|
87
|
+
#
|
88
|
+
# @return [Float] Ratio between 0.0 and 1.0
|
89
|
+
def compression_ratio
|
90
|
+
return 0.0 if original_tokens.zero?
|
91
|
+
|
92
|
+
compressed_tokens.to_f / original_tokens
|
93
|
+
end
|
94
|
+
|
95
|
+
# Compression factor (original / compressed)
|
96
|
+
#
|
97
|
+
# e.g., 5.0 means compressed to 1/5th of original size
|
98
|
+
#
|
99
|
+
# @return [Float] Compression factor
|
100
|
+
def compression_factor
|
101
|
+
return 0.0 if compressed_tokens.zero?
|
102
|
+
|
103
|
+
original_tokens.to_f / compressed_tokens
|
104
|
+
end
|
105
|
+
|
106
|
+
# Compression percentage
|
107
|
+
#
|
108
|
+
# @return [Float] Percentage of original size (0-100)
|
109
|
+
def compression_percentage
|
110
|
+
(compression_ratio * 100).round(2)
|
111
|
+
end
|
112
|
+
|
113
|
+
# Generate a human-readable summary
|
114
|
+
#
|
115
|
+
# @return [String] Summary text
|
116
|
+
def summary
|
117
|
+
<<~SUMMARY
|
118
|
+
Context Compression Results:
|
119
|
+
- Messages: #{original_message_count} → #{compressed_message_count} (-#{messages_removed})
|
120
|
+
- Estimated tokens: #{original_tokens} → #{compressed_tokens} (-#{tokens_removed})
|
121
|
+
- Compression ratio: #{compression_factor.round(1)}:1 (#{compression_percentage}%)
|
122
|
+
- Checkpoints created: #{messages_summarized}
|
123
|
+
- Time taken: #{time_taken.round(3)}s
|
124
|
+
SUMMARY
|
125
|
+
end
|
126
|
+
|
127
|
+
# Convert metrics to hash for logging
|
128
|
+
#
|
129
|
+
# @return [Hash] Metrics as hash
|
130
|
+
def to_h
|
131
|
+
{
|
132
|
+
original_message_count: original_message_count,
|
133
|
+
compressed_message_count: compressed_message_count,
|
134
|
+
messages_removed: messages_removed,
|
135
|
+
messages_summarized: messages_summarized,
|
136
|
+
original_tokens: original_tokens,
|
137
|
+
compressed_tokens: compressed_tokens,
|
138
|
+
tokens_removed: tokens_removed,
|
139
|
+
compression_ratio: compression_ratio.round(4),
|
140
|
+
compression_factor: compression_factor.round(2),
|
141
|
+
compression_percentage: compression_percentage,
|
142
|
+
time_taken: time_taken.round(3),
|
143
|
+
}
|
144
|
+
end
|
145
|
+
end
|
146
|
+
end
|
147
|
+
end
|
@@ -0,0 +1,106 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SwarmSDK
|
4
|
+
class ContextCompactor
|
5
|
+
# TokenCounter provides token estimation for messages
|
6
|
+
#
|
7
|
+
# This uses a simple heuristic approach:
|
8
|
+
# - ~4 characters per token for English prose
|
9
|
+
# - ~3.5 characters per token for code
|
10
|
+
#
|
11
|
+
# For production use with OpenAI models, consider using the tiktoken gem
|
12
|
+
# for accurate token counting. For Claude models, use Claude's token API.
|
13
|
+
#
|
14
|
+
# ## Usage
|
15
|
+
#
|
16
|
+
# tokens = TokenCounter.estimate_message(message)
|
17
|
+
# total_tokens = TokenCounter.estimate_messages(messages)
|
18
|
+
#
|
19
|
+
class TokenCounter
|
20
|
+
# Average characters per token for different content types
|
21
|
+
CHARS_PER_TOKEN_PROSE = 4.0
|
22
|
+
CHARS_PER_TOKEN_CODE = 3.5
|
23
|
+
|
24
|
+
class << self
|
25
|
+
# Estimate tokens for a single message
|
26
|
+
#
|
27
|
+
# @param message [RubyLLM::Message] Message to estimate
|
28
|
+
# @return [Integer] Estimated token count
|
29
|
+
def estimate_message(message)
|
30
|
+
case message.role
|
31
|
+
when :user, :assistant
|
32
|
+
estimate_content(message.content)
|
33
|
+
when :system
|
34
|
+
estimate_content(message.content)
|
35
|
+
when :tool
|
36
|
+
# Tool results typically have overhead
|
37
|
+
base_overhead = 50
|
38
|
+
content_tokens = estimate_content(message.content)
|
39
|
+
base_overhead + content_tokens
|
40
|
+
else
|
41
|
+
# Unknown message type
|
42
|
+
begin
|
43
|
+
estimate_content(message.content)
|
44
|
+
rescue
|
45
|
+
0
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# Estimate tokens for multiple messages
|
51
|
+
#
|
52
|
+
# @param messages [Array<RubyLLM::Message>] Messages to estimate
|
53
|
+
# @return [Integer] Total estimated token count
|
54
|
+
def estimate_messages(messages)
|
55
|
+
messages.sum { |msg| estimate_message(msg) }
|
56
|
+
end
|
57
|
+
|
58
|
+
# Estimate tokens for content string
|
59
|
+
#
|
60
|
+
# Uses heuristic to detect code vs prose and adjust accordingly.
|
61
|
+
#
|
62
|
+
# @param content [String, RubyLLM::Content, nil] Content to estimate
|
63
|
+
# @return [Integer] Estimated token count
|
64
|
+
def estimate_content(content)
|
65
|
+
return 0 if content.nil?
|
66
|
+
|
67
|
+
# Handle RubyLLM::Content objects
|
68
|
+
text = if content.respond_to?(:to_s)
|
69
|
+
content.to_s
|
70
|
+
else
|
71
|
+
content
|
72
|
+
end
|
73
|
+
|
74
|
+
return 0 if text.empty?
|
75
|
+
|
76
|
+
# Detect if content is mostly code
|
77
|
+
code_ratio = detect_code_ratio(text)
|
78
|
+
|
79
|
+
# Choose characters per token based on content type
|
80
|
+
chars_per_token = if code_ratio > 0.1
|
81
|
+
CHARS_PER_TOKEN_CODE # Code
|
82
|
+
else
|
83
|
+
CHARS_PER_TOKEN_PROSE # Prose
|
84
|
+
end
|
85
|
+
|
86
|
+
(text.length / chars_per_token).ceil
|
87
|
+
end
|
88
|
+
|
89
|
+
private
|
90
|
+
|
91
|
+
# Detect ratio of code characters to total characters
|
92
|
+
#
|
93
|
+
# @param text [String] Text to analyze
|
94
|
+
# @return [Float] Ratio of code indicators (0.0 to 1.0)
|
95
|
+
def detect_code_ratio(text)
|
96
|
+
# Count code indicator characters
|
97
|
+
code_chars = text.scan(/[{}()\[\];]/).length
|
98
|
+
|
99
|
+
return 0.0 if text.empty?
|
100
|
+
|
101
|
+
code_chars.to_f / text.length
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
@@ -0,0 +1,340 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SwarmSDK
|
4
|
+
# ContextCompactor implements intelligent conversation history compression
|
5
|
+
#
|
6
|
+
# The Hybrid Production Strategy combines three compression techniques:
|
7
|
+
# 1. Tool result pruning - Aggressively truncate tool outputs (80% of tokens!)
|
8
|
+
# 2. Checkpoint creation - LLM-generated summaries of conversation chunks
|
9
|
+
# 3. Sliding window - Keep recent messages in full detail
|
10
|
+
#
|
11
|
+
# ## Usage
|
12
|
+
#
|
13
|
+
# # From Agent::Chat
|
14
|
+
# metrics = chat.compact_context
|
15
|
+
#
|
16
|
+
# # With options
|
17
|
+
# metrics = chat.compact_context(
|
18
|
+
# tool_result_max_length: 500,
|
19
|
+
# checkpoint_threshold: 50,
|
20
|
+
# sliding_window_size: 20,
|
21
|
+
# summarization_model: "claude-3-haiku-20240307"
|
22
|
+
# )
|
23
|
+
#
|
24
|
+
# ## Metrics
|
25
|
+
#
|
26
|
+
# Returns a Metrics object with compression stats:
|
27
|
+
# - original_message_count / compressed_message_count
|
28
|
+
# - original_tokens / compressed_tokens
|
29
|
+
# - compression_ratio (e.g., 0.15 = 15% of original)
|
30
|
+
# - messages_removed / messages_summarized
|
31
|
+
# - time_taken
|
32
|
+
#
|
33
|
+
class ContextCompactor
|
34
|
+
# Default configuration
|
35
|
+
DEFAULT_OPTIONS = {
|
36
|
+
tool_result_max_length: 500, # Truncate tool results to N chars
|
37
|
+
checkpoint_threshold: 50, # Create checkpoint after N messages
|
38
|
+
sliding_window_size: 20, # Keep last N messages in full
|
39
|
+
summarization_model: "claude-3-haiku-20240307", # Fast model for summaries
|
40
|
+
preserve_system_messages: true, # Always keep system messages
|
41
|
+
preserve_error_messages: true, # Always keep error messages
|
42
|
+
}.freeze
|
43
|
+
|
44
|
+
# Initialize compactor for a chat instance
|
45
|
+
#
|
46
|
+
# @param chat [Agent::Chat] The chat instance to compact
|
47
|
+
# @param options [Hash] Configuration options (see DEFAULT_OPTIONS)
|
48
|
+
def initialize(chat, options = {})
|
49
|
+
@chat = chat
|
50
|
+
@options = DEFAULT_OPTIONS.merge(options)
|
51
|
+
@agent_name = chat.provider.respond_to?(:agent_name) ? chat.provider.agent_name : :unknown
|
52
|
+
end
|
53
|
+
|
54
|
+
# Compact the conversation history using hybrid production strategy
|
55
|
+
#
|
56
|
+
# Returns metrics about the compression operation.
|
57
|
+
#
|
58
|
+
# @return [ContextCompactor::Metrics] Compression metrics
|
59
|
+
def compact
|
60
|
+
start_time = Time.now
|
61
|
+
original_messages = @chat.messages.dup
|
62
|
+
|
63
|
+
# Emit compression_started event
|
64
|
+
LogStream.emit(
|
65
|
+
type: "compression_started",
|
66
|
+
agent: @agent_name,
|
67
|
+
message_count: original_messages.size,
|
68
|
+
estimated_tokens: TokenCounter.estimate_messages(original_messages),
|
69
|
+
)
|
70
|
+
|
71
|
+
# Step 1: Prune tool results
|
72
|
+
pruned = prune_tool_results(original_messages)
|
73
|
+
|
74
|
+
# Step 2: Create checkpoint if needed
|
75
|
+
checkpointed = create_checkpoint_if_needed(pruned)
|
76
|
+
|
77
|
+
# Step 3: Apply sliding window
|
78
|
+
final_messages = apply_sliding_window(checkpointed)
|
79
|
+
|
80
|
+
# Replace messages in chat
|
81
|
+
replace_messages(final_messages)
|
82
|
+
|
83
|
+
# Calculate metrics
|
84
|
+
time_taken = Time.now - start_time
|
85
|
+
metrics = ContextCompactor::Metrics.new(
|
86
|
+
original_messages: original_messages,
|
87
|
+
compressed_messages: final_messages,
|
88
|
+
time_taken: time_taken,
|
89
|
+
)
|
90
|
+
|
91
|
+
# Emit compression_completed event
|
92
|
+
LogStream.emit(
|
93
|
+
type: "compression_completed",
|
94
|
+
agent: @agent_name,
|
95
|
+
original_message_count: metrics.original_message_count,
|
96
|
+
compressed_message_count: metrics.compressed_message_count,
|
97
|
+
original_tokens: metrics.original_tokens,
|
98
|
+
compressed_tokens: metrics.compressed_tokens,
|
99
|
+
compression_ratio: metrics.compression_ratio,
|
100
|
+
messages_removed: metrics.messages_removed,
|
101
|
+
messages_summarized: metrics.messages_summarized,
|
102
|
+
time_taken: metrics.time_taken.round(3),
|
103
|
+
)
|
104
|
+
|
105
|
+
metrics
|
106
|
+
end
|
107
|
+
|
108
|
+
private
|
109
|
+
|
110
|
+
# Step 1: Prune tool results to reduce token count
|
111
|
+
#
|
112
|
+
# Tool results often contain 80%+ of conversation tokens.
|
113
|
+
# We truncate them aggressively while preserving errors.
|
114
|
+
#
|
115
|
+
# @param messages [Array<RubyLLM::Message>] Original messages
|
116
|
+
# @return [Array<RubyLLM::Message>] Messages with pruned tool results
|
117
|
+
def prune_tool_results(messages)
|
118
|
+
max_length = @options[:tool_result_max_length]
|
119
|
+
|
120
|
+
messages.map do |msg|
|
121
|
+
# Only prune tool result messages
|
122
|
+
next msg unless msg.role == :tool
|
123
|
+
|
124
|
+
# Preserve error messages
|
125
|
+
if @options[:preserve_error_messages] && msg.is_error
|
126
|
+
next msg
|
127
|
+
end
|
128
|
+
|
129
|
+
# Truncate long tool results
|
130
|
+
if msg.content.is_a?(String) && msg.content.length > max_length
|
131
|
+
truncated_content = msg.content[0...max_length] + "\n\n[... truncated by context compaction ...]"
|
132
|
+
|
133
|
+
# Create new message with truncated content
|
134
|
+
# We can't modify messages in place, so we create a new one
|
135
|
+
RubyLLM::Message.new(
|
136
|
+
role: :tool,
|
137
|
+
content: truncated_content,
|
138
|
+
tool_call_id: msg.tool_call_id,
|
139
|
+
)
|
140
|
+
else
|
141
|
+
msg
|
142
|
+
end
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
# Step 2: Create checkpoint if conversation is long enough
|
147
|
+
#
|
148
|
+
# Checkpoints are LLM-generated summaries that preserve context
|
149
|
+
# while drastically reducing token count. We keep recent messages
|
150
|
+
# in full detail and checkpoint older conversation.
|
151
|
+
#
|
152
|
+
# @param messages [Array<RubyLLM::Message>] Pruned messages
|
153
|
+
# @return [Array<RubyLLM::Message>] Messages with checkpoint
|
154
|
+
def create_checkpoint_if_needed(messages)
|
155
|
+
threshold = @options[:checkpoint_threshold]
|
156
|
+
window_size = @options[:sliding_window_size]
|
157
|
+
|
158
|
+
# Only checkpoint if we have enough messages
|
159
|
+
return messages if messages.size <= threshold
|
160
|
+
|
161
|
+
# Separate system messages, old messages, and recent messages
|
162
|
+
system_messages = messages.select { |m| m.role == :system }
|
163
|
+
non_system_messages = messages.reject { |m| m.role == :system }
|
164
|
+
|
165
|
+
# Keep recent messages, checkpoint the rest
|
166
|
+
recent_messages = non_system_messages.last(window_size)
|
167
|
+
old_messages = non_system_messages[0...-window_size]
|
168
|
+
|
169
|
+
# Create checkpoint summary of old messages
|
170
|
+
checkpoint_message = create_checkpoint_summary(old_messages)
|
171
|
+
|
172
|
+
# Reconstruct: system messages + checkpoint + recent messages
|
173
|
+
system_messages + [checkpoint_message] + recent_messages
|
174
|
+
end
|
175
|
+
|
176
|
+
# Step 3: Apply sliding window to keep conversation size bounded
|
177
|
+
#
|
178
|
+
# After checkpointing, we still apply a sliding window to ensure
|
179
|
+
# the conversation doesn't grow unbounded.
|
180
|
+
#
|
181
|
+
# @param messages [Array<RubyLLM::Message>] Checkpointed messages
|
182
|
+
# @return [Array<RubyLLM::Message>] Final messages
|
183
|
+
def apply_sliding_window(messages)
|
184
|
+
window_size = @options[:sliding_window_size]
|
185
|
+
|
186
|
+
# Separate system messages from others
|
187
|
+
system_messages = messages.select { |m| m.role == :system }
|
188
|
+
non_system_messages = messages.reject { |m| m.role == :system }
|
189
|
+
|
190
|
+
# Keep only the sliding window of non-system messages
|
191
|
+
recent_messages = non_system_messages.last(window_size)
|
192
|
+
|
193
|
+
# Always include system messages
|
194
|
+
system_messages + recent_messages
|
195
|
+
end
|
196
|
+
|
197
|
+
# Create a checkpoint summary using an LLM
|
198
|
+
#
|
199
|
+
# Uses a fast model (Haiku) to generate a concise summary of
|
200
|
+
# the conversation chunk that preserves critical context.
|
201
|
+
#
|
202
|
+
# @param messages [Array<RubyLLM::Message>] Messages to summarize
|
203
|
+
# @return [RubyLLM::Message] Checkpoint message
|
204
|
+
def create_checkpoint_summary(messages)
|
205
|
+
# Extract key information for summarization
|
206
|
+
user_messages = messages.select { |m| m.role == :user }.map(&:content).compact
|
207
|
+
assistant_messages = messages.select { |m| m.role == :assistant }.map(&:content).compact
|
208
|
+
tool_calls = messages.select { |m| m.role == :assistant && m.tool_calls&.any? }
|
209
|
+
|
210
|
+
# Build summarization prompt
|
211
|
+
prompt = build_summarization_prompt(
|
212
|
+
user_messages: user_messages,
|
213
|
+
assistant_messages: assistant_messages,
|
214
|
+
tool_calls: tool_calls,
|
215
|
+
message_count: messages.size,
|
216
|
+
)
|
217
|
+
|
218
|
+
# Generate summary using fast model
|
219
|
+
summary = generate_summary(prompt)
|
220
|
+
|
221
|
+
# Create checkpoint message
|
222
|
+
checkpoint_content = <<~CHECKPOINT
|
223
|
+
[CONVERSATION CHECKPOINT - #{Time.now.utc.iso8601}]
|
224
|
+
|
225
|
+
#{summary}
|
226
|
+
|
227
|
+
--- Continuing conversation from this point ---
|
228
|
+
CHECKPOINT
|
229
|
+
|
230
|
+
RubyLLM::Message.new(
|
231
|
+
role: :system,
|
232
|
+
content: checkpoint_content,
|
233
|
+
)
|
234
|
+
end
|
235
|
+
|
236
|
+
# Build the summarization prompt for the LLM
|
237
|
+
#
|
238
|
+
# @param user_messages [Array<String>] User message contents
|
239
|
+
# @param assistant_messages [Array<String>] Assistant message contents
|
240
|
+
# @param tool_calls [Array<RubyLLM::Message>] Messages with tool calls
|
241
|
+
# @param message_count [Integer] Total messages being summarized
|
242
|
+
# @return [String] Summarization prompt
|
243
|
+
def build_summarization_prompt(user_messages:, assistant_messages:, tool_calls:, message_count:)
|
244
|
+
# Format tool calls for context
|
245
|
+
tools_used = tool_calls.flat_map do |msg|
|
246
|
+
msg.tool_calls.map { |_id, tc| tc.name }
|
247
|
+
end.uniq
|
248
|
+
|
249
|
+
# Get last few user messages for context
|
250
|
+
recent_user_messages = user_messages.last(5).join("\n---\n")
|
251
|
+
|
252
|
+
<<~PROMPT
|
253
|
+
You are a conversation summarization specialist. Create a concise summary of this conversation
|
254
|
+
that preserves all critical information needed for the assistant to continue working effectively.
|
255
|
+
|
256
|
+
CONVERSATION STATS:
|
257
|
+
- Total messages: #{message_count}
|
258
|
+
- User messages: #{user_messages.size}
|
259
|
+
- Assistant responses: #{assistant_messages.size}
|
260
|
+
- Tools used: #{tools_used.join(", ")}
|
261
|
+
|
262
|
+
RECENT USER REQUESTS (last 5):
|
263
|
+
#{recent_user_messages}
|
264
|
+
|
265
|
+
INSTRUCTIONS:
|
266
|
+
Create a structured summary with these sections:
|
267
|
+
|
268
|
+
## Summary
|
269
|
+
Brief overview of what has been discussed and accomplished (2-3 sentences)
|
270
|
+
|
271
|
+
## Key Facts Discovered
|
272
|
+
- List important facts, findings, or observations
|
273
|
+
- Include file paths, variable names, configurations discussed
|
274
|
+
- Note any errors or issues encountered
|
275
|
+
|
276
|
+
## Decisions Made
|
277
|
+
- List key decisions or approaches agreed upon
|
278
|
+
- Include rationale if relevant
|
279
|
+
|
280
|
+
## Current State
|
281
|
+
- What is the current state of the work?
|
282
|
+
- What files or systems have been modified?
|
283
|
+
- What is working / what needs work?
|
284
|
+
|
285
|
+
## Tools & Actions Completed
|
286
|
+
- Summarize major tool calls and their outcomes
|
287
|
+
- Focus on successful operations and their results
|
288
|
+
|
289
|
+
Be concise but comprehensive. Preserve all information the assistant will need to continue
|
290
|
+
the conversation seamlessly. Use bullet points for clarity.
|
291
|
+
PROMPT
|
292
|
+
end
|
293
|
+
|
294
|
+
# Generate summary using a fast LLM model
|
295
|
+
#
|
296
|
+
# @param prompt [String] Summarization prompt
|
297
|
+
# @return [String] Generated summary
|
298
|
+
def generate_summary(prompt)
|
299
|
+
# Create a temporary chat for summarization
|
300
|
+
summary_chat = RubyLLM::Chat.new(
|
301
|
+
model: @options[:summarization_model],
|
302
|
+
context: @chat.provider.client.context, # Use same context (API keys, etc.)
|
303
|
+
)
|
304
|
+
|
305
|
+
summary_chat.with_instructions("You are a precise conversation summarization assistant.")
|
306
|
+
|
307
|
+
response = summary_chat.ask(prompt)
|
308
|
+
response.content
|
309
|
+
rescue StandardError => e
|
310
|
+
# If summarization fails, create a simple fallback summary
|
311
|
+
RubyLLM.logger.warn("ContextCompactor: Summarization failed: #{e.message}")
|
312
|
+
|
313
|
+
<<~FALLBACK
|
314
|
+
## Summary
|
315
|
+
Previous conversation involved multiple exchanges. Conversation compacted due to context limits.
|
316
|
+
|
317
|
+
## Note
|
318
|
+
Summarization failed - continuing with reduced context. If critical information was lost,
|
319
|
+
please ask the user to provide it again.
|
320
|
+
FALLBACK
|
321
|
+
end
|
322
|
+
|
323
|
+
# Replace messages in the chat
|
324
|
+
#
|
325
|
+
# RubyLLM::Chat doesn't have a public API for replacing all messages,
|
326
|
+
# so we need to work with the internal messages array.
|
327
|
+
#
|
328
|
+
# @param new_messages [Array<RubyLLM::Message>] New message array
|
329
|
+
# @return [void]
|
330
|
+
def replace_messages(new_messages)
|
331
|
+
# Clear existing messages
|
332
|
+
@chat.messages.clear
|
333
|
+
|
334
|
+
# Add new messages
|
335
|
+
new_messages.each do |msg|
|
336
|
+
@chat.messages << msg
|
337
|
+
end
|
338
|
+
end
|
339
|
+
end
|
340
|
+
end
|