swarm_sdk 2.0.0.pre.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. checksums.yaml +7 -0
  2. data/lib/swarm_sdk/agent/builder.rb +333 -0
  3. data/lib/swarm_sdk/agent/chat/context_tracker.rb +271 -0
  4. data/lib/swarm_sdk/agent/chat/hook_integration.rb +372 -0
  5. data/lib/swarm_sdk/agent/chat/logging_helpers.rb +99 -0
  6. data/lib/swarm_sdk/agent/chat/system_reminder_injector.rb +114 -0
  7. data/lib/swarm_sdk/agent/chat.rb +779 -0
  8. data/lib/swarm_sdk/agent/context.rb +108 -0
  9. data/lib/swarm_sdk/agent/definition.rb +335 -0
  10. data/lib/swarm_sdk/configuration.rb +251 -0
  11. data/lib/swarm_sdk/context_compactor/metrics.rb +147 -0
  12. data/lib/swarm_sdk/context_compactor/token_counter.rb +106 -0
  13. data/lib/swarm_sdk/context_compactor.rb +340 -0
  14. data/lib/swarm_sdk/hooks/adapter.rb +359 -0
  15. data/lib/swarm_sdk/hooks/context.rb +163 -0
  16. data/lib/swarm_sdk/hooks/definition.rb +80 -0
  17. data/lib/swarm_sdk/hooks/error.rb +29 -0
  18. data/lib/swarm_sdk/hooks/executor.rb +146 -0
  19. data/lib/swarm_sdk/hooks/registry.rb +143 -0
  20. data/lib/swarm_sdk/hooks/result.rb +150 -0
  21. data/lib/swarm_sdk/hooks/shell_executor.rb +254 -0
  22. data/lib/swarm_sdk/hooks/tool_call.rb +35 -0
  23. data/lib/swarm_sdk/hooks/tool_result.rb +62 -0
  24. data/lib/swarm_sdk/log_collector.rb +83 -0
  25. data/lib/swarm_sdk/log_stream.rb +69 -0
  26. data/lib/swarm_sdk/markdown_parser.rb +46 -0
  27. data/lib/swarm_sdk/permissions/config.rb +239 -0
  28. data/lib/swarm_sdk/permissions/error_formatter.rb +121 -0
  29. data/lib/swarm_sdk/permissions/path_matcher.rb +35 -0
  30. data/lib/swarm_sdk/permissions/validator.rb +173 -0
  31. data/lib/swarm_sdk/permissions_builder.rb +122 -0
  32. data/lib/swarm_sdk/prompts/base_system_prompt.md.erb +237 -0
  33. data/lib/swarm_sdk/providers/openai_with_responses.rb +582 -0
  34. data/lib/swarm_sdk/result.rb +97 -0
  35. data/lib/swarm_sdk/swarm/agent_initializer.rb +224 -0
  36. data/lib/swarm_sdk/swarm/all_agents_builder.rb +62 -0
  37. data/lib/swarm_sdk/swarm/builder.rb +240 -0
  38. data/lib/swarm_sdk/swarm/mcp_configurator.rb +151 -0
  39. data/lib/swarm_sdk/swarm/tool_configurator.rb +267 -0
  40. data/lib/swarm_sdk/swarm.rb +837 -0
  41. data/lib/swarm_sdk/tools/bash.rb +274 -0
  42. data/lib/swarm_sdk/tools/delegate.rb +152 -0
  43. data/lib/swarm_sdk/tools/document_converters/base_converter.rb +83 -0
  44. data/lib/swarm_sdk/tools/document_converters/docx_converter.rb +99 -0
  45. data/lib/swarm_sdk/tools/document_converters/pdf_converter.rb +78 -0
  46. data/lib/swarm_sdk/tools/document_converters/xlsx_converter.rb +194 -0
  47. data/lib/swarm_sdk/tools/edit.rb +150 -0
  48. data/lib/swarm_sdk/tools/glob.rb +158 -0
  49. data/lib/swarm_sdk/tools/grep.rb +231 -0
  50. data/lib/swarm_sdk/tools/image_extractors/docx_image_extractor.rb +43 -0
  51. data/lib/swarm_sdk/tools/image_extractors/pdf_image_extractor.rb +163 -0
  52. data/lib/swarm_sdk/tools/image_formats/tiff_builder.rb +65 -0
  53. data/lib/swarm_sdk/tools/multi_edit.rb +232 -0
  54. data/lib/swarm_sdk/tools/path_resolver.rb +43 -0
  55. data/lib/swarm_sdk/tools/read.rb +251 -0
  56. data/lib/swarm_sdk/tools/registry.rb +73 -0
  57. data/lib/swarm_sdk/tools/scratchpad_list.rb +88 -0
  58. data/lib/swarm_sdk/tools/scratchpad_read.rb +59 -0
  59. data/lib/swarm_sdk/tools/scratchpad_write.rb +88 -0
  60. data/lib/swarm_sdk/tools/stores/read_tracker.rb +61 -0
  61. data/lib/swarm_sdk/tools/stores/scratchpad.rb +153 -0
  62. data/lib/swarm_sdk/tools/stores/todo_manager.rb +65 -0
  63. data/lib/swarm_sdk/tools/todo_write.rb +216 -0
  64. data/lib/swarm_sdk/tools/write.rb +117 -0
  65. data/lib/swarm_sdk/utils.rb +50 -0
  66. data/lib/swarm_sdk/version.rb +5 -0
  67. data/lib/swarm_sdk.rb +69 -0
  68. metadata +169 -0
@@ -0,0 +1,274 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SwarmSDK
4
+ module Tools
5
+ # Bash tool for executing shell commands
6
+ #
7
+ # Executes commands in a persistent shell session with timeout support.
8
+ # Provides comprehensive guidance on proper usage patterns.
9
+ class Bash < RubyLLM::Tool
10
+ def initialize(directory:)
11
+ super()
12
+ @directory = File.expand_path(directory)
13
+ end
14
+
15
+ def name
16
+ "Bash"
17
+ end
18
+
19
+ description <<~DESC
20
+ Executes a given bash command in a persistent shell session with optional timeout, ensuring proper handling and security measures.
21
+
22
+ IMPORTANT: This tool is for terminal operations like git, npm, docker, etc. DO NOT use it for file operations (reading, writing, editing, searching, finding files) - use the specialized tools for this instead.
23
+
24
+ Before executing the command, please follow these steps:
25
+
26
+ 1. Directory Verification:
27
+ - If the command will create new directories or files, first use `ls` to verify the parent directory exists and is the correct location
28
+ - For example, before running "mkdir foo/bar", first use `ls foo` to check that "foo" exists and is the intended parent directory
29
+
30
+ 2. Command Execution:
31
+ - Always quote file paths that contain spaces with double quotes (e.g., cd "path with spaces/file.txt")
32
+ - Examples of proper quoting:
33
+ - cd "/Users/name/My Documents" (correct)
34
+ - cd /Users/name/My Documents (incorrect - will fail)
35
+ - python "/path/with spaces/script.py" (correct)
36
+ - python /path/with spaces/script.py (incorrect - will fail)
37
+ - After ensuring proper quoting, execute the command.
38
+ - Capture the output of the command.
39
+
40
+ Usage notes:
41
+ - The command argument is required.
42
+ - You can specify an optional timeout in milliseconds (up to 600000ms / 10 minutes). If not specified, commands will timeout after 120000ms (2 minutes).
43
+ - It is very helpful if you write a clear, concise description of what this command does in 5-10 words.
44
+ - If the output exceeds 30000 characters, output will be truncated before being returned to you.
45
+ - Avoid using Bash with the `find`, `grep`, `cat`, `head`, `tail`, `sed`, `awk`, or `echo` commands, unless explicitly instructed or when these commands are truly necessary for the task. Instead, always prefer using the dedicated tools for these commands:
46
+ - File search: Use Glob (NOT find or ls)
47
+ - Content search: Use Grep (NOT grep or rg)
48
+ - Read files: Use Read (NOT cat/head/tail)
49
+ - Edit files: Use Edit (NOT sed/awk)
50
+ - Write files: Use Write (NOT echo >/cat <<EOF)
51
+ - Communication: Output text directly (NOT echo/printf)
52
+ - When issuing multiple commands:
53
+ - If the commands are independent and can run in parallel, make multiple Bash tool calls in a single message. For example, if you need to run "git status" and "git diff", send a single message with two Bash tool calls in parallel.
54
+ - If the commands depend on each other and must run sequentially, use a single Bash call with '&&' to chain them together (e.g., `git add . && git commit -m "message" && git push`). For instance, if one operation must complete before another starts (like mkdir before cp, Write before Bash for git operations, or git add before git commit), run these operations sequentially instead.
55
+ - Use ';' only when you need to run commands sequentially but don't care if earlier commands fail
56
+ - DO NOT use newlines to separate commands (newlines are ok in quoted strings)
57
+ - Try to maintain your current working directory throughout the session by using absolute paths and avoiding usage of `cd`. You may use `cd` if the User explicitly requests it.
58
+ <good-example>
59
+ pytest /foo/bar/tests
60
+ </good-example>
61
+ <bad-example>
62
+ cd /foo/bar && pytest tests
63
+ </bad-example>
64
+ DESC
65
+
66
+ param :command,
67
+ type: "string",
68
+ desc: "The command to execute",
69
+ required: true
70
+
71
+ param :description,
72
+ type: "string",
73
+ desc: "Clear, concise description of what this command does in 5-10 words, in active voice. Examples:\nInput: ls\nOutput: List files in current directory\n\nInput: git status\nOutput: Show working tree status\n\nInput: npm install\nOutput: Install package dependencies\n\nInput: mkdir foo\nOutput: Create directory 'foo'",
74
+ required: false
75
+
76
+ param :timeout,
77
+ type: "number",
78
+ desc: "Optional timeout in milliseconds (max 600000)",
79
+ required: false
80
+
81
+ DEFAULT_TIMEOUT_MS = 120_000 # 2 minutes
82
+ MAX_TIMEOUT_MS = 600_000 # 10 minutes
83
+ MAX_OUTPUT_LENGTH = 30_000 # characters
84
+
85
+ # Commands that are ALWAYS blocked for safety reasons
86
+ # These cannot be overridden by permissions configuration
87
+ ALWAYS_BLOCKED_COMMANDS = [
88
+ %r{^rm\s+-rf\s+/$}, # rm -rf / - delete root filesystem
89
+ ].freeze
90
+
91
+ def execute(command:, description: nil, timeout: nil)
92
+ # Validate inputs
93
+ return validation_error("command is required") if command.nil? || command.empty?
94
+
95
+ # Check against always-blocked commands
96
+ blocked_pattern = ALWAYS_BLOCKED_COMMANDS.find { |pattern| pattern.match?(command) }
97
+ if blocked_pattern
98
+ return blocked_command_error(command, blocked_pattern)
99
+ end
100
+
101
+ # Validate and set timeout
102
+ timeout_ms = timeout || DEFAULT_TIMEOUT_MS
103
+ timeout_ms = [timeout_ms, MAX_TIMEOUT_MS].min
104
+ timeout_seconds = timeout_ms / 1000.0
105
+
106
+ # Execute command with timeout
107
+ stdout = +""
108
+ stderr = +""
109
+ exit_status = nil
110
+
111
+ begin
112
+ require "open3"
113
+ require "timeout"
114
+
115
+ Timeout.timeout(timeout_seconds) do
116
+ # CRITICAL: Change to agent's directory for subprocess
117
+ # This is SAFE because Open3.popen3 creates a subprocess
118
+ # The subprocess inherits the directory, but the parent fiber is unaffected
119
+ Dir.chdir(@directory) do
120
+ Open3.popen3(command) do |stdin, out, err, wait_thr|
121
+ stdin.close # Close stdin since we don't send input
122
+
123
+ # Read stdout and stderr
124
+ stdout = out.read || ""
125
+ stderr = err.read || ""
126
+ exit_status = wait_thr.value.exitstatus
127
+ end
128
+ end
129
+ end
130
+ rescue Timeout::Error
131
+ return format_timeout_error(command, timeout_seconds)
132
+ rescue Errno::ENOENT => e
133
+ return error("Command not found or executable not in PATH: #{e.message}")
134
+ rescue Errno::EACCES
135
+ return error("Permission denied: Cannot execute command '#{command}'")
136
+ rescue StandardError => e
137
+ return error("Failed to execute command: #{e.class.name} - #{e.message}")
138
+ end
139
+
140
+ # Build output
141
+ output = format_command_output(command, description, stdout, stderr, exit_status)
142
+
143
+ # Truncate if too long
144
+ if output.length > MAX_OUTPUT_LENGTH
145
+ truncated = output[0...MAX_OUTPUT_LENGTH]
146
+ truncated += "\n\n<system-reminder>Output truncated at #{MAX_OUTPUT_LENGTH} characters. The full output was #{output.length} characters.</system-reminder>"
147
+ output = truncated
148
+ end
149
+
150
+ # Add usage reminders for certain patterns
151
+ output = add_usage_reminders(output, command)
152
+
153
+ output
154
+ rescue StandardError => e
155
+ error("Unexpected error executing command: #{e.class.name} - #{e.message}")
156
+ end
157
+
158
+ private
159
+
160
+ def validation_error(message)
161
+ "<tool_use_error>InputValidationError: #{message}</tool_use_error>"
162
+ end
163
+
164
+ def error(message)
165
+ "Error: #{message}"
166
+ end
167
+
168
+ def blocked_command_error(command, pattern)
169
+ <<~ERROR
170
+ Error: Command blocked for safety reasons.
171
+ Command: #{command}
172
+ Pattern: #{pattern.source}
173
+
174
+ <system-reminder>
175
+ SECURITY BLOCK: This command is permanently blocked for safety reasons and cannot be executed.
176
+
177
+ This is a built-in safety feature of the Bash tool that cannot be overridden by any configuration.
178
+ The command matches a pattern that could cause catastrophic system damage.
179
+
180
+ DO NOT attempt to:
181
+ - Modify the command slightly to bypass this check
182
+ - Ask the user to allow this command
183
+ - Work around this restriction in any way
184
+
185
+ If you need to perform a similar operation safely, consider:
186
+ - Using a more specific path instead of system-wide operations
187
+ - Using dedicated tools for file operations
188
+ - Asking the user for guidance on a safer approach
189
+
190
+ This is an UNRECOVERABLE error. You must inform the user that this command cannot be executed for safety reasons.
191
+ </system-reminder>
192
+ ERROR
193
+ end
194
+
195
+ def format_timeout_error(command, timeout_seconds)
196
+ <<~ERROR
197
+ Error: Command timed out after #{timeout_seconds} seconds.
198
+ Command: #{command}
199
+
200
+ <system-reminder>The command exceeded the timeout limit. Consider:
201
+ 1. Breaking the command into smaller steps
202
+ 2. Increasing the timeout parameter
203
+ 3. Running long-running commands in the background if supported
204
+ </system-reminder>
205
+ ERROR
206
+ end
207
+
208
+ def format_command_output(command, description, stdout, stderr, exit_status)
209
+ parts = []
210
+
211
+ # Add description if provided
212
+ parts << "Running: #{description}" if description
213
+
214
+ # Add command
215
+ parts << "$ #{command}"
216
+ parts << ""
217
+
218
+ # Add exit status
219
+ parts << "Exit code: #{exit_status}"
220
+
221
+ # Add stdout if present
222
+ if stdout && !stdout.empty?
223
+ parts << ""
224
+ parts << "STDOUT:"
225
+ parts << stdout.chomp
226
+ end
227
+
228
+ # Add stderr if present
229
+ if stderr && !stderr.empty?
230
+ parts << ""
231
+ parts << "STDERR:"
232
+ parts << stderr.chomp
233
+ end
234
+
235
+ # Add warning for non-zero exit
236
+ if exit_status != 0
237
+ parts << ""
238
+ parts << "<system-reminder>Command exited with non-zero status (#{exit_status}). Check STDERR for error details.</system-reminder>"
239
+ end
240
+
241
+ parts.join("\n")
242
+ end
243
+
244
+ def add_usage_reminders(output, command)
245
+ reminders = []
246
+
247
+ # Detect file operation commands that should use dedicated tools
248
+ if command.match?(/\b(cat|head|tail|less|more)\s+/)
249
+ reminders << "You used a command to read a file. Consider using the Read tool instead for better formatting and error handling."
250
+ end
251
+
252
+ if command.match?(/\b(grep|rg|ag)\s+/)
253
+ reminders << "You used grep/ripgrep to search files. Consider using the Grep tool instead for structured results."
254
+ end
255
+
256
+ if command.match?(/\b(find|locate)\s+/)
257
+ reminders << "You used find to locate files. Consider using the Glob tool instead for pattern-based file matching."
258
+ end
259
+
260
+ if command.match?(/\b(sed|awk)\s+/) && !command.include?("|")
261
+ reminders << "You used sed/awk for file editing. Consider using the Edit tool instead for safer, tracked file modifications."
262
+ end
263
+
264
+ if command.match?(/\becho\s+.*>\s*/) || command.match?(/\bcat\s*<</)
265
+ reminders << "You used echo/cat with redirection to write a file. Consider using the Write tool instead for proper file creation."
266
+ end
267
+
268
+ return output if reminders.empty?
269
+
270
+ output + "\n\n<system-reminder>\n#{reminders.join("\n\n")}\n</system-reminder>"
271
+ end
272
+ end
273
+ end
274
+ end
@@ -0,0 +1,152 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SwarmSDK
4
+ module Tools
5
+ # Delegate tool for delegating tasks to other agents in the swarm
6
+ #
7
+ # Creates agent-specific delegation tools (e.g., DelegateTaskToBackend)
8
+ # that allow one agent to delegate work to another agent.
9
+ # Supports pre/post delegation hooks for customization.
10
+ class Delegate < RubyLLM::Tool
11
+ attr_reader :delegate_name, :delegate_target, :tool_name
12
+
13
+ # Initialize a delegation tool
14
+ #
15
+ # @param delegate_name [String] Name of the delegate agent (e.g., "backend")
16
+ # @param delegate_description [String] Description of the delegate agent
17
+ # @param delegate_chat [AgentChat] The chat instance for the delegate agent
18
+ # @param agent_name [Symbol, String] Name of the agent using this tool
19
+ # @param swarm [Swarm] The swarm instance
20
+ # @param hook_registry [Hooks::Registry] Registry for callbacks
21
+ def initialize(
22
+ delegate_name:,
23
+ delegate_description:,
24
+ delegate_chat:,
25
+ agent_name:,
26
+ swarm:,
27
+ hook_registry:
28
+ )
29
+ super()
30
+
31
+ @delegate_name = delegate_name
32
+ @delegate_description = delegate_description
33
+ @delegate_chat = delegate_chat
34
+ @agent_name = agent_name
35
+ @swarm = swarm
36
+ @hook_registry = hook_registry
37
+
38
+ # Generate tool name in the expected format: DelegateTaskTo[AgentName]
39
+ @tool_name = "DelegateTaskTo#{delegate_name.to_s.capitalize}"
40
+ @delegate_target = delegate_name.to_s
41
+ end
42
+
43
+ # Build description dynamically based on delegate
44
+ description do
45
+ "Delegate tasks to #{@delegate_name}. #{@delegate_description}"
46
+ end
47
+
48
+ param :task,
49
+ type: "string",
50
+ desc: "Task description for the agent",
51
+ required: true
52
+
53
+ # Override name to return custom delegation tool name
54
+ def name
55
+ @tool_name
56
+ end
57
+
58
+ # Execute delegation with pre/post hooks
59
+ #
60
+ # @param task [String] Task to delegate
61
+ # @return [String] Result from delegate agent or error message
62
+ def execute(task:)
63
+ # Trigger pre_delegation callback
64
+ context = Hooks::Context.new(
65
+ event: :pre_delegation,
66
+ agent_name: @agent_name,
67
+ swarm: @swarm,
68
+ delegation_target: @delegate_target,
69
+ metadata: {
70
+ tool_name: @tool_name,
71
+ task: task,
72
+ timestamp: Time.now.utc.iso8601,
73
+ },
74
+ )
75
+
76
+ executor = Hooks::Executor.new(@hook_registry, logger: RubyLLM.logger)
77
+ result = executor.execute_safe(event: :pre_delegation, context: context, callbacks: [])
78
+
79
+ # Check if callback halted or replaced the delegation
80
+ if result.halt?
81
+ return result.value || "Delegation halted by callback"
82
+ elsif result.replace?
83
+ return result.value
84
+ end
85
+
86
+ # Proceed with delegation
87
+ response = @delegate_chat.ask(task)
88
+ delegation_result = response.content
89
+
90
+ # Trigger post_delegation callback
91
+ post_context = Hooks::Context.new(
92
+ event: :post_delegation,
93
+ agent_name: @agent_name,
94
+ swarm: @swarm,
95
+ delegation_target: @delegate_target,
96
+ delegation_result: delegation_result,
97
+ metadata: {
98
+ tool_name: @tool_name,
99
+ task: task,
100
+ result: delegation_result,
101
+ timestamp: Time.now.utc.iso8601,
102
+ },
103
+ )
104
+
105
+ post_result = executor.execute_safe(event: :post_delegation, context: post_context, callbacks: [])
106
+
107
+ # Return modified result if callback replaces it
108
+ if post_result.replace?
109
+ post_result.value
110
+ else
111
+ delegation_result
112
+ end
113
+ rescue Faraday::TimeoutError, Net::ReadTimeout => e
114
+ # Log timeout error as JSON event
115
+ LogStream.emit(
116
+ type: "delegation_error",
117
+ agent: @agent_name,
118
+ delegate_to: @tool_name,
119
+ error_class: e.class.name,
120
+ error_message: "Request timed out",
121
+ backtrace: e.backtrace&.first(5) || [],
122
+ )
123
+ "Error: Request to #{@tool_name} timed out. The agent may be overloaded or the LLM service is not responding. Please try again or simplify the task."
124
+ rescue Faraday::Error => e
125
+ # Log network error as JSON event
126
+ LogStream.emit(
127
+ type: "delegation_error",
128
+ agent: @agent_name,
129
+ delegate_to: @tool_name,
130
+ error_class: e.class.name,
131
+ error_message: e.message,
132
+ backtrace: e.backtrace&.first(5) || [],
133
+ )
134
+ "Error: Network error communicating with #{@tool_name}: #{e.class.name}. Please check connectivity and try again."
135
+ rescue StandardError => e
136
+ # Log unexpected error as JSON event
137
+ backtrace_array = e.backtrace&.first(5) || []
138
+ LogStream.emit(
139
+ type: "delegation_error",
140
+ agent: @agent_name,
141
+ delegate_to: @tool_name,
142
+ error_class: e.class.name,
143
+ error_message: e.message,
144
+ backtrace: backtrace_array,
145
+ )
146
+ # Return error string for LLM
147
+ backtrace_str = backtrace_array.join("\n ")
148
+ "Error: #{@tool_name} encountered an error: #{e.class.name}: #{e.message}\nBacktrace:\n #{backtrace_str}"
149
+ end
150
+ end
151
+ end
152
+ end
@@ -0,0 +1,83 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SwarmSDK
4
+ module Tools
5
+ module DocumentConverters
6
+ # Base class for document converters
7
+ # Provides common interface and utility methods for converting various document formats
8
+ class BaseConverter
9
+ class << self
10
+ # The gem name required for this converter
11
+ # @return [String]
12
+ def gem_name
13
+ raise NotImplementedError, "#{name} must implement .gem_name"
14
+ end
15
+
16
+ # Human-readable format name
17
+ # @return [String]
18
+ def format_name
19
+ raise NotImplementedError, "#{name} must implement .format_name"
20
+ end
21
+
22
+ # File extensions this converter handles
23
+ # @return [Array<String>]
24
+ def extensions
25
+ raise NotImplementedError, "#{name} must implement .extensions"
26
+ end
27
+
28
+ # Check if the required gem is available
29
+ # @return [Boolean]
30
+ def available?
31
+ gem_available?(gem_name)
32
+ end
33
+
34
+ # Check if a gem is installed
35
+ # @param gem_name [String] Name of the gem to check
36
+ # @return [Boolean]
37
+ def gem_available?(gem_name)
38
+ Gem::Specification.find_by_name(gem_name)
39
+ true
40
+ rescue Gem::LoadError
41
+ false
42
+ end
43
+ end
44
+
45
+ # Convert a document file to text/content
46
+ # @param file_path [String] Path to the file
47
+ # @return [String, RubyLLM::Content] Converted content or error message
48
+ def convert(file_path)
49
+ raise NotImplementedError, "#{self.class.name} must implement #convert"
50
+ end
51
+
52
+ protected
53
+
54
+ # Return a system reminder about missing gem
55
+ # @param format [String] Format name (e.g., "PDF")
56
+ # @param gem_name [String] Required gem name
57
+ # @return [String]
58
+ def unsupported_format_reminder(format, gem_name)
59
+ <<~REMINDER
60
+ <system-reminder>
61
+ This file is a #{format} document, but the required gem is not installed.
62
+
63
+ To enable #{format} file reading, please install the gem:
64
+ gem install #{gem_name}
65
+
66
+ Or add to your Gemfile:
67
+ gem "#{gem_name}"
68
+
69
+ Don't install the gem yourself. Ask the user if they would like you to install this gem.
70
+ </system-reminder>
71
+ REMINDER
72
+ end
73
+
74
+ # Return an error message
75
+ # @param message [String] Error message
76
+ # @return [String]
77
+ def error(message)
78
+ "Error: #{message}"
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,99 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SwarmSDK
4
+ module Tools
5
+ module DocumentConverters
6
+ # Converts DOCX documents to text with image extraction
7
+ class DocxConverter < BaseConverter
8
+ class << self
9
+ def gem_name
10
+ "docx"
11
+ end
12
+
13
+ def format_name
14
+ "DOCX"
15
+ end
16
+
17
+ def extensions
18
+ [".docx", ".doc"]
19
+ end
20
+ end
21
+
22
+ # Convert a DOCX document to text/content
23
+ # @param file_path [String] Path to the DOCX file
24
+ # @return [String, RubyLLM::Content] Converted content or error message
25
+ def convert(file_path)
26
+ unless self.class.available?
27
+ return unsupported_format_reminder(self.class.format_name, self.class.gem_name)
28
+ end
29
+
30
+ # Check for legacy DOC format
31
+ if File.extname(file_path).downcase == ".doc"
32
+ return error("DOC format is not supported. Please convert to DOCX first.")
33
+ end
34
+
35
+ begin
36
+ require "docx"
37
+ require "tmpdir"
38
+
39
+ doc = Docx::Document.open(file_path)
40
+
41
+ # Extract images from the DOCX
42
+ image_paths = ImageExtractors::DocxImageExtractor.extract_images(doc, file_path)
43
+
44
+ output = []
45
+ output << "Document: #{File.basename(file_path)}"
46
+ output << "=" * 60
47
+ output << ""
48
+
49
+ # Extract paragraphs
50
+ paragraphs = doc.paragraphs.map(&:text).reject(&:empty?)
51
+
52
+ # Check for empty document
53
+ if paragraphs.empty? && doc.tables.empty?
54
+ output << "(Document is empty - no paragraphs or tables)"
55
+ else
56
+ output += paragraphs
57
+
58
+ # Extract tables with enhanced formatting
59
+ if doc.tables.any?
60
+ output << ""
61
+ output << "Tables:"
62
+ output << "-" * 60
63
+
64
+ doc.tables.each_with_index do |table, idx|
65
+ output << ""
66
+ output << "Table #{idx + 1} (#{table.row_count} rows × #{table.column_count} columns):"
67
+
68
+ table.rows.each do |row|
69
+ output << row.cells.map(&:text).join(" | ")
70
+ end
71
+ end
72
+ end
73
+ end
74
+
75
+ text_content = output.join("\n")
76
+
77
+ # If there are images, return Content with attachments
78
+ if image_paths.any?
79
+ content = RubyLLM::Content.new(text_content)
80
+ image_paths.each do |image_path|
81
+ content.add_attachment(image_path)
82
+ end
83
+ content
84
+ else
85
+ # No images, return just text
86
+ text_content
87
+ end
88
+ rescue Zip::Error => e
89
+ error("Invalid or corrupted DOCX file: #{e.message}")
90
+ rescue Errno::ENOENT => e
91
+ error("File not found or missing document.xml: #{e.message}")
92
+ rescue StandardError => e
93
+ error("Failed to parse DOCX file: #{e.message}")
94
+ end
95
+ end
96
+ end
97
+ end
98
+ end
99
+ end
@@ -0,0 +1,78 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SwarmSDK
4
+ module Tools
5
+ module DocumentConverters
6
+ # Converts PDF documents to text with image extraction
7
+ class PdfConverter < BaseConverter
8
+ class << self
9
+ def gem_name
10
+ "pdf-reader"
11
+ end
12
+
13
+ def format_name
14
+ "PDF"
15
+ end
16
+
17
+ def extensions
18
+ [".pdf"]
19
+ end
20
+ end
21
+
22
+ # Convert a PDF document to text/content
23
+ # @param file_path [String] Path to the PDF file
24
+ # @return [String, RubyLLM::Content] Converted content or error message
25
+ def convert(file_path)
26
+ unless self.class.available?
27
+ return unsupported_format_reminder(self.class.format_name, self.class.gem_name)
28
+ end
29
+
30
+ begin
31
+ require "pdf-reader"
32
+ require "tmpdir"
33
+ require "fileutils"
34
+
35
+ reader = PDF::Reader.new(file_path)
36
+ output = []
37
+ output << "PDF Document: #{File.basename(file_path)}"
38
+ output << "=" * 60
39
+ output << "Pages: #{reader.page_count}"
40
+ output << ""
41
+
42
+ # Extract images from the PDF
43
+ image_paths = ImageExtractors::PdfImageExtractor.extract_images(reader, file_path)
44
+
45
+ # Extract text from each page
46
+ reader.pages.each_with_index do |page, index|
47
+ output << "Page #{index + 1}:"
48
+ output << "-" * 60
49
+ text = page.text.strip
50
+ output << (text.empty? ? "(No text content on this page)" : text)
51
+ output << ""
52
+ end
53
+
54
+ text_content = output.join("\n")
55
+
56
+ # If there are images, return Content with attachments
57
+ if image_paths.any?
58
+ content = RubyLLM::Content.new(text_content)
59
+ image_paths.each do |image_path|
60
+ content.add_attachment(image_path)
61
+ end
62
+ content
63
+ else
64
+ # No images, return just text
65
+ text_content
66
+ end
67
+ rescue PDF::Reader::MalformedPDFError => e
68
+ error("PDF file is malformed: #{e.message}")
69
+ rescue PDF::Reader::UnsupportedFeatureError => e
70
+ error("PDF contains unsupported features: #{e.message}")
71
+ rescue StandardError => e
72
+ error("Failed to parse PDF file: #{e.message}")
73
+ end
74
+ end
75
+ end
76
+ end
77
+ end
78
+ end