swarm_sdk 2.0.0.pre.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. checksums.yaml +7 -0
  2. data/lib/swarm_sdk/agent/builder.rb +333 -0
  3. data/lib/swarm_sdk/agent/chat/context_tracker.rb +271 -0
  4. data/lib/swarm_sdk/agent/chat/hook_integration.rb +372 -0
  5. data/lib/swarm_sdk/agent/chat/logging_helpers.rb +99 -0
  6. data/lib/swarm_sdk/agent/chat/system_reminder_injector.rb +114 -0
  7. data/lib/swarm_sdk/agent/chat.rb +779 -0
  8. data/lib/swarm_sdk/agent/context.rb +108 -0
  9. data/lib/swarm_sdk/agent/definition.rb +335 -0
  10. data/lib/swarm_sdk/configuration.rb +251 -0
  11. data/lib/swarm_sdk/context_compactor/metrics.rb +147 -0
  12. data/lib/swarm_sdk/context_compactor/token_counter.rb +106 -0
  13. data/lib/swarm_sdk/context_compactor.rb +340 -0
  14. data/lib/swarm_sdk/hooks/adapter.rb +359 -0
  15. data/lib/swarm_sdk/hooks/context.rb +163 -0
  16. data/lib/swarm_sdk/hooks/definition.rb +80 -0
  17. data/lib/swarm_sdk/hooks/error.rb +29 -0
  18. data/lib/swarm_sdk/hooks/executor.rb +146 -0
  19. data/lib/swarm_sdk/hooks/registry.rb +143 -0
  20. data/lib/swarm_sdk/hooks/result.rb +150 -0
  21. data/lib/swarm_sdk/hooks/shell_executor.rb +254 -0
  22. data/lib/swarm_sdk/hooks/tool_call.rb +35 -0
  23. data/lib/swarm_sdk/hooks/tool_result.rb +62 -0
  24. data/lib/swarm_sdk/log_collector.rb +83 -0
  25. data/lib/swarm_sdk/log_stream.rb +69 -0
  26. data/lib/swarm_sdk/markdown_parser.rb +46 -0
  27. data/lib/swarm_sdk/permissions/config.rb +239 -0
  28. data/lib/swarm_sdk/permissions/error_formatter.rb +121 -0
  29. data/lib/swarm_sdk/permissions/path_matcher.rb +35 -0
  30. data/lib/swarm_sdk/permissions/validator.rb +173 -0
  31. data/lib/swarm_sdk/permissions_builder.rb +122 -0
  32. data/lib/swarm_sdk/prompts/base_system_prompt.md.erb +237 -0
  33. data/lib/swarm_sdk/providers/openai_with_responses.rb +582 -0
  34. data/lib/swarm_sdk/result.rb +97 -0
  35. data/lib/swarm_sdk/swarm/agent_initializer.rb +224 -0
  36. data/lib/swarm_sdk/swarm/all_agents_builder.rb +62 -0
  37. data/lib/swarm_sdk/swarm/builder.rb +240 -0
  38. data/lib/swarm_sdk/swarm/mcp_configurator.rb +151 -0
  39. data/lib/swarm_sdk/swarm/tool_configurator.rb +267 -0
  40. data/lib/swarm_sdk/swarm.rb +837 -0
  41. data/lib/swarm_sdk/tools/bash.rb +274 -0
  42. data/lib/swarm_sdk/tools/delegate.rb +152 -0
  43. data/lib/swarm_sdk/tools/document_converters/base_converter.rb +83 -0
  44. data/lib/swarm_sdk/tools/document_converters/docx_converter.rb +99 -0
  45. data/lib/swarm_sdk/tools/document_converters/pdf_converter.rb +78 -0
  46. data/lib/swarm_sdk/tools/document_converters/xlsx_converter.rb +194 -0
  47. data/lib/swarm_sdk/tools/edit.rb +150 -0
  48. data/lib/swarm_sdk/tools/glob.rb +158 -0
  49. data/lib/swarm_sdk/tools/grep.rb +231 -0
  50. data/lib/swarm_sdk/tools/image_extractors/docx_image_extractor.rb +43 -0
  51. data/lib/swarm_sdk/tools/image_extractors/pdf_image_extractor.rb +163 -0
  52. data/lib/swarm_sdk/tools/image_formats/tiff_builder.rb +65 -0
  53. data/lib/swarm_sdk/tools/multi_edit.rb +232 -0
  54. data/lib/swarm_sdk/tools/path_resolver.rb +43 -0
  55. data/lib/swarm_sdk/tools/read.rb +251 -0
  56. data/lib/swarm_sdk/tools/registry.rb +73 -0
  57. data/lib/swarm_sdk/tools/scratchpad_list.rb +88 -0
  58. data/lib/swarm_sdk/tools/scratchpad_read.rb +59 -0
  59. data/lib/swarm_sdk/tools/scratchpad_write.rb +88 -0
  60. data/lib/swarm_sdk/tools/stores/read_tracker.rb +61 -0
  61. data/lib/swarm_sdk/tools/stores/scratchpad.rb +153 -0
  62. data/lib/swarm_sdk/tools/stores/todo_manager.rb +65 -0
  63. data/lib/swarm_sdk/tools/todo_write.rb +216 -0
  64. data/lib/swarm_sdk/tools/write.rb +117 -0
  65. data/lib/swarm_sdk/utils.rb +50 -0
  66. data/lib/swarm_sdk/version.rb +5 -0
  67. data/lib/swarm_sdk.rb +69 -0
  68. metadata +169 -0
@@ -0,0 +1,147 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SwarmSDK
4
+ class ContextCompactor
5
+ # Metrics tracks compression statistics
6
+ #
7
+ # Provides detailed information about the compression operation:
8
+ # - Message counts (before/after)
9
+ # - Token counts (before/after)
10
+ # - Compression ratio
11
+ # - Time taken
12
+ # - Summary of changes
13
+ #
14
+ # ## Usage
15
+ #
16
+ # metrics = agent.compact_context
17
+ # puts metrics.summary
18
+ # puts "Compressed from #{metrics.original_tokens} to #{metrics.compressed_tokens} tokens"
19
+ # puts "Compression ratio: #{(metrics.compression_ratio * 100).round(1)}%"
20
+ #
21
+ class Metrics
22
+ attr_reader :original_messages, :compressed_messages, :time_taken
23
+
24
+ # Initialize metrics from compression operation
25
+ #
26
+ # @param original_messages [Array<RubyLLM::Message>] Messages before compression
27
+ # @param compressed_messages [Array<RubyLLM::Message>] Messages after compression
28
+ # @param time_taken [Float] Time taken in seconds
29
+ def initialize(original_messages:, compressed_messages:, time_taken:)
30
+ @original_messages = original_messages
31
+ @compressed_messages = compressed_messages
32
+ @time_taken = time_taken
33
+ end
34
+
35
+ # Number of messages before compression
36
+ #
37
+ # @return [Integer] Original message count
38
+ def original_message_count
39
+ @original_messages.size
40
+ end
41
+
42
+ # Number of messages after compression
43
+ #
44
+ # @return [Integer] Compressed message count
45
+ def compressed_message_count
46
+ @compressed_messages.size
47
+ end
48
+
49
+ # Number of messages removed
50
+ #
51
+ # @return [Integer] Messages removed
52
+ def messages_removed
53
+ original_message_count - compressed_message_count
54
+ end
55
+
56
+ # Number of checkpoint summary messages created
57
+ #
58
+ # @return [Integer] Checkpoint messages
59
+ def messages_summarized
60
+ @compressed_messages.count do |msg|
61
+ msg.role == :system && msg.content.to_s.include?("CONVERSATION CHECKPOINT")
62
+ end
63
+ end
64
+
65
+ # Estimated tokens before compression
66
+ #
67
+ # @return [Integer] Original token count
68
+ def original_tokens
69
+ @original_tokens ||= TokenCounter.estimate_messages(@original_messages)
70
+ end
71
+
72
+ # Estimated tokens after compression
73
+ #
74
+ # @return [Integer] Compressed token count
75
+ def compressed_tokens
76
+ @compressed_tokens ||= TokenCounter.estimate_messages(@compressed_messages)
77
+ end
78
+
79
+ # Number of tokens removed
80
+ #
81
+ # @return [Integer] Tokens removed
82
+ def tokens_removed
83
+ original_tokens - compressed_tokens
84
+ end
85
+
86
+ # Compression ratio (compressed / original)
87
+ #
88
+ # @return [Float] Ratio between 0.0 and 1.0
89
+ def compression_ratio
90
+ return 0.0 if original_tokens.zero?
91
+
92
+ compressed_tokens.to_f / original_tokens
93
+ end
94
+
95
+ # Compression factor (original / compressed)
96
+ #
97
+ # e.g., 5.0 means compressed to 1/5th of original size
98
+ #
99
+ # @return [Float] Compression factor
100
+ def compression_factor
101
+ return 0.0 if compressed_tokens.zero?
102
+
103
+ original_tokens.to_f / compressed_tokens
104
+ end
105
+
106
+ # Compression percentage
107
+ #
108
+ # @return [Float] Percentage of original size (0-100)
109
+ def compression_percentage
110
+ (compression_ratio * 100).round(2)
111
+ end
112
+
113
+ # Generate a human-readable summary
114
+ #
115
+ # @return [String] Summary text
116
+ def summary
117
+ <<~SUMMARY
118
+ Context Compression Results:
119
+ - Messages: #{original_message_count} → #{compressed_message_count} (-#{messages_removed})
120
+ - Estimated tokens: #{original_tokens} → #{compressed_tokens} (-#{tokens_removed})
121
+ - Compression ratio: #{compression_factor.round(1)}:1 (#{compression_percentage}%)
122
+ - Checkpoints created: #{messages_summarized}
123
+ - Time taken: #{time_taken.round(3)}s
124
+ SUMMARY
125
+ end
126
+
127
+ # Convert metrics to hash for logging
128
+ #
129
+ # @return [Hash] Metrics as hash
130
+ def to_h
131
+ {
132
+ original_message_count: original_message_count,
133
+ compressed_message_count: compressed_message_count,
134
+ messages_removed: messages_removed,
135
+ messages_summarized: messages_summarized,
136
+ original_tokens: original_tokens,
137
+ compressed_tokens: compressed_tokens,
138
+ tokens_removed: tokens_removed,
139
+ compression_ratio: compression_ratio.round(4),
140
+ compression_factor: compression_factor.round(2),
141
+ compression_percentage: compression_percentage,
142
+ time_taken: time_taken.round(3),
143
+ }
144
+ end
145
+ end
146
+ end
147
+ end
@@ -0,0 +1,106 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SwarmSDK
4
+ class ContextCompactor
5
+ # TokenCounter provides token estimation for messages
6
+ #
7
+ # This uses a simple heuristic approach:
8
+ # - ~4 characters per token for English prose
9
+ # - ~3.5 characters per token for code
10
+ #
11
+ # For production use with OpenAI models, consider using the tiktoken gem
12
+ # for accurate token counting. For Claude models, use Claude's token API.
13
+ #
14
+ # ## Usage
15
+ #
16
+ # tokens = TokenCounter.estimate_message(message)
17
+ # total_tokens = TokenCounter.estimate_messages(messages)
18
+ #
19
+ class TokenCounter
20
+ # Average characters per token for different content types
21
+ CHARS_PER_TOKEN_PROSE = 4.0
22
+ CHARS_PER_TOKEN_CODE = 3.5
23
+
24
+ class << self
25
+ # Estimate tokens for a single message
26
+ #
27
+ # @param message [RubyLLM::Message] Message to estimate
28
+ # @return [Integer] Estimated token count
29
+ def estimate_message(message)
30
+ case message.role
31
+ when :user, :assistant
32
+ estimate_content(message.content)
33
+ when :system
34
+ estimate_content(message.content)
35
+ when :tool
36
+ # Tool results typically have overhead
37
+ base_overhead = 50
38
+ content_tokens = estimate_content(message.content)
39
+ base_overhead + content_tokens
40
+ else
41
+ # Unknown message type
42
+ begin
43
+ estimate_content(message.content)
44
+ rescue
45
+ 0
46
+ end
47
+ end
48
+ end
49
+
50
+ # Estimate tokens for multiple messages
51
+ #
52
+ # @param messages [Array<RubyLLM::Message>] Messages to estimate
53
+ # @return [Integer] Total estimated token count
54
+ def estimate_messages(messages)
55
+ messages.sum { |msg| estimate_message(msg) }
56
+ end
57
+
58
+ # Estimate tokens for content string
59
+ #
60
+ # Uses heuristic to detect code vs prose and adjust accordingly.
61
+ #
62
+ # @param content [String, RubyLLM::Content, nil] Content to estimate
63
+ # @return [Integer] Estimated token count
64
+ def estimate_content(content)
65
+ return 0 if content.nil?
66
+
67
+ # Handle RubyLLM::Content objects
68
+ text = if content.respond_to?(:to_s)
69
+ content.to_s
70
+ else
71
+ content
72
+ end
73
+
74
+ return 0 if text.empty?
75
+
76
+ # Detect if content is mostly code
77
+ code_ratio = detect_code_ratio(text)
78
+
79
+ # Choose characters per token based on content type
80
+ chars_per_token = if code_ratio > 0.1
81
+ CHARS_PER_TOKEN_CODE # Code
82
+ else
83
+ CHARS_PER_TOKEN_PROSE # Prose
84
+ end
85
+
86
+ (text.length / chars_per_token).ceil
87
+ end
88
+
89
+ private
90
+
91
+ # Detect ratio of code characters to total characters
92
+ #
93
+ # @param text [String] Text to analyze
94
+ # @return [Float] Ratio of code indicators (0.0 to 1.0)
95
+ def detect_code_ratio(text)
96
+ # Count code indicator characters
97
+ code_chars = text.scan(/[{}()\[\];]/).length
98
+
99
+ return 0.0 if text.empty?
100
+
101
+ code_chars.to_f / text.length
102
+ end
103
+ end
104
+ end
105
+ end
106
+ end
@@ -0,0 +1,340 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SwarmSDK
4
+ # ContextCompactor implements intelligent conversation history compression
5
+ #
6
+ # The Hybrid Production Strategy combines three compression techniques:
7
+ # 1. Tool result pruning - Aggressively truncate tool outputs (80% of tokens!)
8
+ # 2. Checkpoint creation - LLM-generated summaries of conversation chunks
9
+ # 3. Sliding window - Keep recent messages in full detail
10
+ #
11
+ # ## Usage
12
+ #
13
+ # # From Agent::Chat
14
+ # metrics = chat.compact_context
15
+ #
16
+ # # With options
17
+ # metrics = chat.compact_context(
18
+ # tool_result_max_length: 500,
19
+ # checkpoint_threshold: 50,
20
+ # sliding_window_size: 20,
21
+ # summarization_model: "claude-3-haiku-20240307"
22
+ # )
23
+ #
24
+ # ## Metrics
25
+ #
26
+ # Returns a Metrics object with compression stats:
27
+ # - original_message_count / compressed_message_count
28
+ # - original_tokens / compressed_tokens
29
+ # - compression_ratio (e.g., 0.15 = 15% of original)
30
+ # - messages_removed / messages_summarized
31
+ # - time_taken
32
+ #
33
+ class ContextCompactor
34
+ # Default configuration
35
+ DEFAULT_OPTIONS = {
36
+ tool_result_max_length: 500, # Truncate tool results to N chars
37
+ checkpoint_threshold: 50, # Create checkpoint after N messages
38
+ sliding_window_size: 20, # Keep last N messages in full
39
+ summarization_model: "claude-3-haiku-20240307", # Fast model for summaries
40
+ preserve_system_messages: true, # Always keep system messages
41
+ preserve_error_messages: true, # Always keep error messages
42
+ }.freeze
43
+
44
+ # Initialize compactor for a chat instance
45
+ #
46
+ # @param chat [Agent::Chat] The chat instance to compact
47
+ # @param options [Hash] Configuration options (see DEFAULT_OPTIONS)
48
+ def initialize(chat, options = {})
49
+ @chat = chat
50
+ @options = DEFAULT_OPTIONS.merge(options)
51
+ @agent_name = chat.provider.respond_to?(:agent_name) ? chat.provider.agent_name : :unknown
52
+ end
53
+
54
+ # Compact the conversation history using hybrid production strategy
55
+ #
56
+ # Returns metrics about the compression operation.
57
+ #
58
+ # @return [ContextCompactor::Metrics] Compression metrics
59
+ def compact
60
+ start_time = Time.now
61
+ original_messages = @chat.messages.dup
62
+
63
+ # Emit compression_started event
64
+ LogStream.emit(
65
+ type: "compression_started",
66
+ agent: @agent_name,
67
+ message_count: original_messages.size,
68
+ estimated_tokens: TokenCounter.estimate_messages(original_messages),
69
+ )
70
+
71
+ # Step 1: Prune tool results
72
+ pruned = prune_tool_results(original_messages)
73
+
74
+ # Step 2: Create checkpoint if needed
75
+ checkpointed = create_checkpoint_if_needed(pruned)
76
+
77
+ # Step 3: Apply sliding window
78
+ final_messages = apply_sliding_window(checkpointed)
79
+
80
+ # Replace messages in chat
81
+ replace_messages(final_messages)
82
+
83
+ # Calculate metrics
84
+ time_taken = Time.now - start_time
85
+ metrics = ContextCompactor::Metrics.new(
86
+ original_messages: original_messages,
87
+ compressed_messages: final_messages,
88
+ time_taken: time_taken,
89
+ )
90
+
91
+ # Emit compression_completed event
92
+ LogStream.emit(
93
+ type: "compression_completed",
94
+ agent: @agent_name,
95
+ original_message_count: metrics.original_message_count,
96
+ compressed_message_count: metrics.compressed_message_count,
97
+ original_tokens: metrics.original_tokens,
98
+ compressed_tokens: metrics.compressed_tokens,
99
+ compression_ratio: metrics.compression_ratio,
100
+ messages_removed: metrics.messages_removed,
101
+ messages_summarized: metrics.messages_summarized,
102
+ time_taken: metrics.time_taken.round(3),
103
+ )
104
+
105
+ metrics
106
+ end
107
+
108
+ private
109
+
110
+ # Step 1: Prune tool results to reduce token count
111
+ #
112
+ # Tool results often contain 80%+ of conversation tokens.
113
+ # We truncate them aggressively while preserving errors.
114
+ #
115
+ # @param messages [Array<RubyLLM::Message>] Original messages
116
+ # @return [Array<RubyLLM::Message>] Messages with pruned tool results
117
+ def prune_tool_results(messages)
118
+ max_length = @options[:tool_result_max_length]
119
+
120
+ messages.map do |msg|
121
+ # Only prune tool result messages
122
+ next msg unless msg.role == :tool
123
+
124
+ # Preserve error messages
125
+ if @options[:preserve_error_messages] && msg.is_error
126
+ next msg
127
+ end
128
+
129
+ # Truncate long tool results
130
+ if msg.content.is_a?(String) && msg.content.length > max_length
131
+ truncated_content = msg.content[0...max_length] + "\n\n[... truncated by context compaction ...]"
132
+
133
+ # Create new message with truncated content
134
+ # We can't modify messages in place, so we create a new one
135
+ RubyLLM::Message.new(
136
+ role: :tool,
137
+ content: truncated_content,
138
+ tool_call_id: msg.tool_call_id,
139
+ )
140
+ else
141
+ msg
142
+ end
143
+ end
144
+ end
145
+
146
+ # Step 2: Create checkpoint if conversation is long enough
147
+ #
148
+ # Checkpoints are LLM-generated summaries that preserve context
149
+ # while drastically reducing token count. We keep recent messages
150
+ # in full detail and checkpoint older conversation.
151
+ #
152
+ # @param messages [Array<RubyLLM::Message>] Pruned messages
153
+ # @return [Array<RubyLLM::Message>] Messages with checkpoint
154
+ def create_checkpoint_if_needed(messages)
155
+ threshold = @options[:checkpoint_threshold]
156
+ window_size = @options[:sliding_window_size]
157
+
158
+ # Only checkpoint if we have enough messages
159
+ return messages if messages.size <= threshold
160
+
161
+ # Separate system messages, old messages, and recent messages
162
+ system_messages = messages.select { |m| m.role == :system }
163
+ non_system_messages = messages.reject { |m| m.role == :system }
164
+
165
+ # Keep recent messages, checkpoint the rest
166
+ recent_messages = non_system_messages.last(window_size)
167
+ old_messages = non_system_messages[0...-window_size]
168
+
169
+ # Create checkpoint summary of old messages
170
+ checkpoint_message = create_checkpoint_summary(old_messages)
171
+
172
+ # Reconstruct: system messages + checkpoint + recent messages
173
+ system_messages + [checkpoint_message] + recent_messages
174
+ end
175
+
176
+ # Step 3: Apply sliding window to keep conversation size bounded
177
+ #
178
+ # After checkpointing, we still apply a sliding window to ensure
179
+ # the conversation doesn't grow unbounded.
180
+ #
181
+ # @param messages [Array<RubyLLM::Message>] Checkpointed messages
182
+ # @return [Array<RubyLLM::Message>] Final messages
183
+ def apply_sliding_window(messages)
184
+ window_size = @options[:sliding_window_size]
185
+
186
+ # Separate system messages from others
187
+ system_messages = messages.select { |m| m.role == :system }
188
+ non_system_messages = messages.reject { |m| m.role == :system }
189
+
190
+ # Keep only the sliding window of non-system messages
191
+ recent_messages = non_system_messages.last(window_size)
192
+
193
+ # Always include system messages
194
+ system_messages + recent_messages
195
+ end
196
+
197
+ # Create a checkpoint summary using an LLM
198
+ #
199
+ # Uses a fast model (Haiku) to generate a concise summary of
200
+ # the conversation chunk that preserves critical context.
201
+ #
202
+ # @param messages [Array<RubyLLM::Message>] Messages to summarize
203
+ # @return [RubyLLM::Message] Checkpoint message
204
+ def create_checkpoint_summary(messages)
205
+ # Extract key information for summarization
206
+ user_messages = messages.select { |m| m.role == :user }.map(&:content).compact
207
+ assistant_messages = messages.select { |m| m.role == :assistant }.map(&:content).compact
208
+ tool_calls = messages.select { |m| m.role == :assistant && m.tool_calls&.any? }
209
+
210
+ # Build summarization prompt
211
+ prompt = build_summarization_prompt(
212
+ user_messages: user_messages,
213
+ assistant_messages: assistant_messages,
214
+ tool_calls: tool_calls,
215
+ message_count: messages.size,
216
+ )
217
+
218
+ # Generate summary using fast model
219
+ summary = generate_summary(prompt)
220
+
221
+ # Create checkpoint message
222
+ checkpoint_content = <<~CHECKPOINT
223
+ [CONVERSATION CHECKPOINT - #{Time.now.utc.iso8601}]
224
+
225
+ #{summary}
226
+
227
+ --- Continuing conversation from this point ---
228
+ CHECKPOINT
229
+
230
+ RubyLLM::Message.new(
231
+ role: :system,
232
+ content: checkpoint_content,
233
+ )
234
+ end
235
+
236
+ # Build the summarization prompt for the LLM
237
+ #
238
+ # @param user_messages [Array<String>] User message contents
239
+ # @param assistant_messages [Array<String>] Assistant message contents
240
+ # @param tool_calls [Array<RubyLLM::Message>] Messages with tool calls
241
+ # @param message_count [Integer] Total messages being summarized
242
+ # @return [String] Summarization prompt
243
+ def build_summarization_prompt(user_messages:, assistant_messages:, tool_calls:, message_count:)
244
+ # Format tool calls for context
245
+ tools_used = tool_calls.flat_map do |msg|
246
+ msg.tool_calls.map { |_id, tc| tc.name }
247
+ end.uniq
248
+
249
+ # Get last few user messages for context
250
+ recent_user_messages = user_messages.last(5).join("\n---\n")
251
+
252
+ <<~PROMPT
253
+ You are a conversation summarization specialist. Create a concise summary of this conversation
254
+ that preserves all critical information needed for the assistant to continue working effectively.
255
+
256
+ CONVERSATION STATS:
257
+ - Total messages: #{message_count}
258
+ - User messages: #{user_messages.size}
259
+ - Assistant responses: #{assistant_messages.size}
260
+ - Tools used: #{tools_used.join(", ")}
261
+
262
+ RECENT USER REQUESTS (last 5):
263
+ #{recent_user_messages}
264
+
265
+ INSTRUCTIONS:
266
+ Create a structured summary with these sections:
267
+
268
+ ## Summary
269
+ Brief overview of what has been discussed and accomplished (2-3 sentences)
270
+
271
+ ## Key Facts Discovered
272
+ - List important facts, findings, or observations
273
+ - Include file paths, variable names, configurations discussed
274
+ - Note any errors or issues encountered
275
+
276
+ ## Decisions Made
277
+ - List key decisions or approaches agreed upon
278
+ - Include rationale if relevant
279
+
280
+ ## Current State
281
+ - What is the current state of the work?
282
+ - What files or systems have been modified?
283
+ - What is working / what needs work?
284
+
285
+ ## Tools & Actions Completed
286
+ - Summarize major tool calls and their outcomes
287
+ - Focus on successful operations and their results
288
+
289
+ Be concise but comprehensive. Preserve all information the assistant will need to continue
290
+ the conversation seamlessly. Use bullet points for clarity.
291
+ PROMPT
292
+ end
293
+
294
+ # Generate summary using a fast LLM model
295
+ #
296
+ # @param prompt [String] Summarization prompt
297
+ # @return [String] Generated summary
298
+ def generate_summary(prompt)
299
+ # Create a temporary chat for summarization
300
+ summary_chat = RubyLLM::Chat.new(
301
+ model: @options[:summarization_model],
302
+ context: @chat.provider.client.context, # Use same context (API keys, etc.)
303
+ )
304
+
305
+ summary_chat.with_instructions("You are a precise conversation summarization assistant.")
306
+
307
+ response = summary_chat.ask(prompt)
308
+ response.content
309
+ rescue StandardError => e
310
+ # If summarization fails, create a simple fallback summary
311
+ RubyLLM.logger.warn("ContextCompactor: Summarization failed: #{e.message}")
312
+
313
+ <<~FALLBACK
314
+ ## Summary
315
+ Previous conversation involved multiple exchanges. Conversation compacted due to context limits.
316
+
317
+ ## Note
318
+ Summarization failed - continuing with reduced context. If critical information was lost,
319
+ please ask the user to provide it again.
320
+ FALLBACK
321
+ end
322
+
323
+ # Replace messages in the chat
324
+ #
325
+ # RubyLLM::Chat doesn't have a public API for replacing all messages,
326
+ # so we need to work with the internal messages array.
327
+ #
328
+ # @param new_messages [Array<RubyLLM::Message>] New message array
329
+ # @return [void]
330
+ def replace_messages(new_messages)
331
+ # Clear existing messages
332
+ @chat.messages.clear
333
+
334
+ # Add new messages
335
+ new_messages.each do |msg|
336
+ @chat.messages << msg
337
+ end
338
+ end
339
+ end
340
+ end