swarm_sdk 3.0.0.alpha1 → 3.0.0.alpha3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 373c4df61931acf2a3ed260a6686068dc918f9d3ad6f3eea746f81a38711e2b5
4
- data.tar.gz: 2b5414dd35362e4e58616092717b47b2207dcf54a886c10e74f5408f07686192
3
+ metadata.gz: 569ac707625b9bcfc78080af26a7c1d48895b5057302c23f98543b9b44e6a706
4
+ data.tar.gz: ab685ff041e4106046ce4da5e4ec9b3937fd4d6290e1f5deeb7d7991f579da6b
5
5
  SHA512:
6
- metadata.gz: 46a0bba9e559d4acc92dbb5bf68776471bf2409a2aa0cee9b3ed458e0d9028b24b69534b6963a9a1b57de23ff305c9d69c7a927828f80de4de7750561311bb4f
7
- data.tar.gz: add457e2f8a04cca43f0fd9f169207695cad562a7f7c0b98a217a002560d335e693cca5ef0a39ad0438a7d0ff818246f97d43a0155d103f9d3944e73cb9e3027
6
+ metadata.gz: fd68128f22e9758ae66548a4ae2f48b210e92b2c79f4bd78c46fd47cd1d34a9d282740ee35881c27c515ac7eb8b294db6f36c8a70e762ea01ad1d52085da0b5b
7
+ data.tar.gz: d802eb7a4112874bf7b2ad414564af4fb9806074b8d8d108a471efc33943133942ee646360c7f4ce8390728d44cc031cede119fca993a79a8c6f1e6a7a6c7ee1
@@ -305,6 +305,77 @@ module SwarmSDK
305
305
  false
306
306
  end
307
307
 
308
+ # Run an iterative refinement loop over the agent
309
+ #
310
+ # Executes a kickoff prompt followed by repeated iterate prompts,
311
+ # optionally checking for convergence via embedding similarity
312
+ # between consecutive responses. Each iteration is a normal ask()
313
+ # call — hooks fire, memory ingests, and events stream normally.
314
+ #
315
+ # @param kickoff [String] Prompt for the first iteration
316
+ # @param iterate [String] Prompt for subsequent iterations
317
+ # @param max_iterations [Integer] Maximum number of iterations (>= 1)
318
+ # @param convergence_threshold [Float] Similarity threshold for convergence (0.0..1.0)
319
+ # @param converge [Boolean] Whether to check for convergence via embeddings
320
+ # @yield [event] Optional block receives ALL events (content_chunk, loop_*, etc.)
321
+ # @yieldparam event [Hash] Event hash with :type, :timestamp, and event-specific fields
322
+ # @return [Loop::Result] Aggregate result with iterations and convergence status
323
+ #
324
+ # @example Basic iterative refinement
325
+ # result = agent.loop(
326
+ # kickoff: "Write a poem about the sea",
327
+ # iterate: "Improve the poem, making it more vivid",
328
+ # max_iterations: 5,
329
+ # )
330
+ # puts result.final_response.content
331
+ # puts "Converged: #{result.converged?}"
332
+ #
333
+ # @example Without convergence checking
334
+ # result = agent.loop(
335
+ # kickoff: "Draft an outline",
336
+ # iterate: "Expand the next section",
337
+ # max_iterations: 3,
338
+ # converge: false,
339
+ # )
340
+ #
341
+ # @example With event streaming
342
+ # agent.loop(kickoff: "Start", iterate: "Continue", max_iterations: 5) do |event|
343
+ # case event[:type]
344
+ # when "loop_iteration_completed"
345
+ # puts "Iteration #{event[:iteration]}, delta: #{event[:delta_score]}"
346
+ # when "content_chunk"
347
+ # print event[:content]
348
+ # end
349
+ # end
350
+ #
351
+ # @raise [ArgumentError] If max_iterations < 1 or convergence_threshold out of range
352
+ def loop(kickoff:, iterate:, max_iterations: 10, convergence_threshold: 0.95, converge: true, &block)
353
+ validate_loop_params!(max_iterations, convergence_threshold)
354
+
355
+ embedder = converge ? loop_embedder : nil
356
+ ask_callable = ->(prompt) { ask(prompt, &block) }
357
+
358
+ executor = Loop::Executor.new(
359
+ ask_callable: ask_callable,
360
+ embedder: embedder,
361
+ agent_id: @id,
362
+ )
363
+
364
+ # Wrap with block emitter so loop lifecycle events
365
+ # (loop_started, loop_iteration_completed, loop_completed)
366
+ # reach the caller's block. Each ask() inside the executor
367
+ # will also set/restore the block emitter for its own events.
368
+ with_block_emitter(block) do
369
+ executor.run(
370
+ kickoff: kickoff,
371
+ iterate: iterate,
372
+ max_iterations: max_iterations,
373
+ convergence_threshold: convergence_threshold,
374
+ converge: converge,
375
+ )
376
+ end
377
+ end
378
+
308
379
  # Run memory defragmentation (compression, consolidation, promotion, pruning)
309
380
  #
310
381
  # Call this between sessions, on a schedule, or whenever appropriate.
@@ -1160,6 +1231,33 @@ module SwarmSDK
1160
1231
  @mcp_connectors.each(&:disconnect!)
1161
1232
  @mcp_connectors.clear
1162
1233
  end
1234
+
1235
+ # Validate loop parameters
1236
+ #
1237
+ # @param max_iterations [Integer] Must be >= 1
1238
+ # @param convergence_threshold [Float] Must be between 0.0 and 1.0
1239
+ # @raise [ArgumentError] If parameters are invalid
1240
+ # @return [void]
1241
+ def validate_loop_params!(max_iterations, convergence_threshold)
1242
+ unless max_iterations.is_a?(Integer) && max_iterations >= 1
1243
+ raise ArgumentError, "max_iterations must be an integer >= 1, got #{max_iterations.inspect}"
1244
+ end
1245
+
1246
+ unless convergence_threshold.is_a?(Numeric) && convergence_threshold >= 0.0 && convergence_threshold <= 1.0
1247
+ raise ArgumentError,
1248
+ "convergence_threshold must be between 0.0 and 1.0, got #{convergence_threshold.inspect}"
1249
+ end
1250
+ end
1251
+
1252
+ # Resolve or create an embedder for loop convergence detection
1253
+ #
1254
+ # Reuses the memory store's embedder if available (ONNX model
1255
+ # already loaded), otherwise creates a standalone instance.
1256
+ #
1257
+ # @return [Memory::Embedder]
1258
+ def loop_embedder
1259
+ @memory_store&.embedder || Memory::Embedder.new
1260
+ end
1163
1261
  end
1164
1262
  end
1165
1263
  end
@@ -0,0 +1,147 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SwarmSDK
4
+ module V3
5
+ module Loop
6
+ # Core loop engine for iterative refinement
7
+ #
8
+ # Runs a sequence of ask() calls through a provided callable,
9
+ # optionally checking for convergence via embedding similarity
10
+ # between consecutive responses. Emits events for each iteration
11
+ # and the overall loop lifecycle.
12
+ #
13
+ # The Executor is stateless between runs — all configuration is
14
+ # passed to {#run}. The ask callable and embedder are injected
15
+ # at construction for testability.
16
+ #
17
+ # @example Basic usage (called internally by Agent#loop)
18
+ # executor = Executor.new(
19
+ # ask_callable: ->(prompt) { agent.ask(prompt) },
20
+ # embedder: Memory::Embedder.new,
21
+ # agent_id: "writer_abc123",
22
+ # )
23
+ # result = executor.run(
24
+ # kickoff: "Write a poem about the sea",
25
+ # iterate: "Improve the poem",
26
+ # max_iterations: 5,
27
+ # convergence_threshold: 0.95,
28
+ # converge: true,
29
+ # )
30
+ class Executor
31
+ include Memory::Adapters::VectorUtils
32
+
33
+ # Create a new Executor
34
+ #
35
+ # @param ask_callable [#call] Lambda wrapping agent.ask(prompt)
36
+ # @param embedder [Memory::Embedder, nil] Embedder for convergence detection (nil if converge: false)
37
+ # @param agent_id [String] Agent identifier for event emission
38
+ def initialize(ask_callable:, embedder:, agent_id:)
39
+ @ask_callable = ask_callable
40
+ @embedder = embedder
41
+ @agent_id = agent_id
42
+ end
43
+
44
+ # Execute the iterative loop
45
+ #
46
+ # Runs the kickoff prompt first, then the iterate prompt for
47
+ # subsequent iterations. If convergence checking is enabled,
48
+ # computes embedding similarity between consecutive responses
49
+ # and stops when it exceeds the threshold.
50
+ #
51
+ # @param kickoff [String] Prompt for the first iteration
52
+ # @param iterate [String] Prompt for subsequent iterations
53
+ # @param max_iterations [Integer] Maximum number of iterations (>= 1)
54
+ # @param convergence_threshold [Float] Similarity threshold for convergence (0.0..1.0)
55
+ # @param converge [Boolean] Whether to check for convergence
56
+ # @return [Result] Aggregate result with all iterations
57
+ #
58
+ # @example Run with convergence
59
+ # result = executor.run(
60
+ # kickoff: "Draft an essay",
61
+ # iterate: "Revise and improve",
62
+ # max_iterations: 10,
63
+ # convergence_threshold: 0.95,
64
+ # converge: true,
65
+ # )
66
+ # result.converged? #=> true or false
67
+ def run(kickoff:, iterate:, max_iterations:, convergence_threshold:, converge:)
68
+ EventStream.emit(
69
+ type: "loop_started",
70
+ agent: @agent_id,
71
+ max_iterations: max_iterations,
72
+ convergence_threshold: convergence_threshold,
73
+ )
74
+
75
+ iterations = []
76
+ converged = false
77
+ previous_content = nil
78
+
79
+ max_iterations.times do |index|
80
+ prompt = index.zero? ? kickoff : iterate
81
+ response = @ask_callable.call(prompt)
82
+
83
+ # Handle interrupted agent (ask() returns nil)
84
+ break if response.nil?
85
+
86
+ current_content = response.content.to_s
87
+ input_tokens = response.respond_to?(:input_tokens) ? (response.input_tokens || 0) : 0
88
+ output_tokens = response.respond_to?(:output_tokens) ? (response.output_tokens || 0) : 0
89
+
90
+ delta_score = nil
91
+ if converge && previous_content && @embedder
92
+ delta_score = compute_delta(previous_content, current_content)
93
+ end
94
+
95
+ iteration = Iteration.new(
96
+ number: index + 1,
97
+ response: response,
98
+ prompt: prompt,
99
+ tokens: { input: input_tokens, output: output_tokens },
100
+ delta_score: delta_score,
101
+ )
102
+ iterations << iteration
103
+
104
+ EventStream.emit(
105
+ type: "loop_iteration_completed",
106
+ agent: @agent_id,
107
+ iteration: index + 1,
108
+ delta_score: delta_score,
109
+ converged: false,
110
+ )
111
+
112
+ if delta_score && delta_score >= convergence_threshold
113
+ converged = true
114
+ break
115
+ end
116
+
117
+ previous_content = current_content
118
+ end
119
+
120
+ EventStream.emit(
121
+ type: "loop_completed",
122
+ agent: @agent_id,
123
+ iterations: iterations.size,
124
+ converged: converged,
125
+ )
126
+
127
+ Result.new(iterations: iterations, converged: converged)
128
+ end
129
+
130
+ private
131
+
132
+ # Compute cosine similarity between two response texts
133
+ #
134
+ # Uses batch embedding for efficiency — both texts are embedded
135
+ # in a single call to the underlying model.
136
+ #
137
+ # @param previous_content [String] Previous iteration's response text
138
+ # @param current_content [String] Current iteration's response text
139
+ # @return [Float] Cosine similarity (0.0..1.0 for typical text)
140
+ def compute_delta(previous_content, current_content)
141
+ vectors = @embedder.embed_batch([previous_content, current_content])
142
+ similarity(vectors[0], vectors[1])
143
+ end
144
+ end
145
+ end
146
+ end
147
+ end
@@ -0,0 +1,63 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SwarmSDK
4
+ module V3
5
+ module Loop
6
+ # Immutable record of a single loop iteration
7
+ #
8
+ # Captures the prompt sent, the LLM response received, token usage,
9
+ # and the embedding-similarity delta from the previous iteration.
10
+ # Frozen on creation to prevent accidental mutation.
11
+ #
12
+ # @example First iteration (no delta)
13
+ # iteration = Iteration.new(
14
+ # number: 1,
15
+ # response: response,
16
+ # prompt: "Write a poem",
17
+ # tokens: { input: 10, output: 20 },
18
+ # delta_score: nil,
19
+ # )
20
+ #
21
+ # @example Subsequent iteration with convergence score
22
+ # iteration = Iteration.new(
23
+ # number: 2,
24
+ # response: response,
25
+ # prompt: "Improve the poem",
26
+ # tokens: { input: 15, output: 25 },
27
+ # delta_score: 0.87,
28
+ # )
29
+ class Iteration
30
+ # @return [Integer] 1-indexed iteration number
31
+ attr_reader :number
32
+
33
+ # @return [RubyLLM::Message] LLM response from ask()
34
+ attr_reader :response
35
+
36
+ # @return [String] Prompt used for this iteration (kickoff or iterate)
37
+ attr_reader :prompt
38
+
39
+ # @return [Hash{Symbol => Integer}] Token usage { input:, output: }
40
+ attr_reader :tokens
41
+
42
+ # @return [Float, nil] Cosine similarity to previous iteration (nil for iteration 1)
43
+ attr_reader :delta_score
44
+
45
+ # Create a new Iteration record
46
+ #
47
+ # @param number [Integer] 1-indexed iteration number
48
+ # @param response [RubyLLM::Message] LLM response
49
+ # @param prompt [String] Prompt used
50
+ # @param tokens [Hash{Symbol => Integer}] Token usage { input:, output: }
51
+ # @param delta_score [Float, nil] Similarity to previous iteration
52
+ def initialize(number:, response:, prompt:, tokens:, delta_score:)
53
+ @number = number
54
+ @response = response
55
+ @prompt = prompt
56
+ @tokens = tokens.freeze
57
+ @delta_score = delta_score
58
+ freeze
59
+ end
60
+ end
61
+ end
62
+ end
63
+ end
@@ -0,0 +1,74 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SwarmSDK
4
+ module V3
5
+ module Loop
6
+ # Aggregate result of a completed loop execution
7
+ #
8
+ # Contains all iterations and whether the loop converged.
9
+ # Frozen on creation — both the Result and its iterations array
10
+ # are immutable after construction.
11
+ #
12
+ # @example Converged loop
13
+ # result = executor.run(kickoff: "Write a poem", iterate: "Improve it", ...)
14
+ # result.converged? #=> true
15
+ # result.iteration_count #=> 3
16
+ # result.final_response #=> RubyLLM::Message
17
+ # result.total_tokens #=> { input: 45, output: 60 }
18
+ #
19
+ # @example Non-converged loop (hit max_iterations)
20
+ # result.converged? #=> false
21
+ # result.iteration_count #=> 10
22
+ class Result
23
+ # @return [Array<Iteration>] All iterations (frozen)
24
+ attr_reader :iterations
25
+
26
+ # @return [Boolean] Whether the loop converged below the threshold
27
+ attr_reader :converged
28
+ alias_method :converged?, :converged
29
+
30
+ # Create a new Result
31
+ #
32
+ # @param iterations [Array<Iteration>] Completed iterations
33
+ # @param converged [Boolean] Whether convergence was detected
34
+ def initialize(iterations:, converged:)
35
+ @iterations = iterations.freeze
36
+ @converged = converged
37
+ freeze
38
+ end
39
+
40
+ # The last iteration's LLM response
41
+ #
42
+ # @return [RubyLLM::Message, nil] Final response, or nil if no iterations
43
+ #
44
+ # @example
45
+ # puts result.final_response.content
46
+ def final_response
47
+ @iterations.last&.response
48
+ end
49
+
50
+ # Number of iterations executed
51
+ #
52
+ # @return [Integer]
53
+ #
54
+ # @example
55
+ # result.iteration_count #=> 3
56
+ def iteration_count
57
+ @iterations.size
58
+ end
59
+
60
+ # Aggregate token usage across all iterations
61
+ #
62
+ # @return [Hash{Symbol => Integer}] { input:, output: }
63
+ #
64
+ # @example
65
+ # result.total_tokens #=> { input: 45, output: 60 }
66
+ def total_tokens
67
+ input = @iterations.sum { |i| i.tokens[:input] }
68
+ output = @iterations.sum { |i| i.tokens[:output] }
69
+ { input: input, output: output }
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,84 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SwarmSDK
4
+ module V3
5
+ module Tools
6
+ module DocumentConverters
7
+ # Abstract base class for document converters
8
+ #
9
+ # Provides common interface and helpers for converting documents to text.
10
+ # Each converter checks gem availability and provides clear error messages.
11
+ class Base
12
+ class << self
13
+ # Gem name required for this converter
14
+ #
15
+ # @return [String] the gem name
16
+ # @raise [NotImplementedError] if not implemented by subclass
17
+ def gem_name
18
+ raise NotImplementedError, "#{name} must implement .gem_name"
19
+ end
20
+
21
+ # Human-readable format name
22
+ #
23
+ # @return [String] the format name (e.g., "PDF", "DOCX")
24
+ # @raise [NotImplementedError] if not implemented by subclass
25
+ def format_name
26
+ raise NotImplementedError, "#{name} must implement .format_name"
27
+ end
28
+
29
+ # File extensions this converter handles
30
+ #
31
+ # @return [Array<String>] array of extensions including dot (e.g., [".pdf"])
32
+ # @raise [NotImplementedError] if not implemented by subclass
33
+ def extensions
34
+ raise NotImplementedError, "#{name} must implement .extensions"
35
+ end
36
+
37
+ # Check if required gem is available
38
+ #
39
+ # @return [Boolean] true if gem is installed
40
+ def available?
41
+ Gem::Specification.find_by_name(gem_name)
42
+ true
43
+ rescue Gem::MissingSpecError
44
+ false
45
+ end
46
+ end
47
+
48
+ # Convert document to text (possibly with image attachments)
49
+ #
50
+ # @param file_path [String] absolute path to document
51
+ # @return [String, RubyLLM::Content] text or text with image attachments
52
+ # @raise [NotImplementedError] if not implemented by subclass
53
+ def convert(file_path)
54
+ raise NotImplementedError, "#{self.class.name} must implement #convert"
55
+ end
56
+
57
+ protected
58
+
59
+ # Return system reminder for missing gem
60
+ #
61
+ # @return [String] formatted system reminder message
62
+ def unsupported_format_message
63
+ <<~MSG.strip
64
+ <system-reminder>
65
+ This is a #{self.class.format_name} document, but the required gem is not installed.
66
+
67
+ To enable #{self.class.format_name} reading:
68
+ gem install #{self.class.gem_name}
69
+ </system-reminder>
70
+ MSG
71
+ end
72
+
73
+ # Return formatted error message
74
+ #
75
+ # @param message [String] error description
76
+ # @return [String] formatted error message
77
+ def error(message)
78
+ "Error: #{message}"
79
+ end
80
+ end
81
+ end
82
+ end
83
+ end
84
+ end
@@ -0,0 +1,120 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SwarmSDK
4
+ module V3
5
+ module Tools
6
+ module DocumentConverters
7
+ # DOCX document converter
8
+ #
9
+ # Converts DOCX files to text and extracts images.
10
+ # Requires the docx gem (which includes rubyzip).
11
+ class DocxConverter < Base
12
+ class << self
13
+ # @return [String] gem name
14
+ def gem_name
15
+ "docx"
16
+ end
17
+
18
+ # @return [String] format name
19
+ def format_name
20
+ "DOCX"
21
+ end
22
+
23
+ # @return [Array<String>] supported extensions
24
+ def extensions
25
+ [".docx"]
26
+ end
27
+ end
28
+
29
+ # Convert DOCX to text with optional image attachments
30
+ #
31
+ # @param file_path [String] path to DOCX file
32
+ # @return [String, RubyLLM::Content] text or content with images
33
+ def convert(file_path)
34
+ return unsupported_format_message unless self.class.available?
35
+ return error("Legacy .doc format not supported") if file_path.end_with?(".doc")
36
+
37
+ require "docx"
38
+ doc = Docx::Document.open(file_path)
39
+
40
+ # Extract text content
41
+ output = build_text_output(doc, file_path)
42
+
43
+ # Extract images (inline - no separate class)
44
+ image_paths = extract_images(file_path)
45
+
46
+ if image_paths.any?
47
+ content = RubyLLM::Content.new(output)
48
+ image_paths.each { |path| content.add_attachment(path) }
49
+ content
50
+ else
51
+ output
52
+ end
53
+ rescue StandardError => e
54
+ error("DOCX conversion failed: #{e.message}")
55
+ end
56
+
57
+ private
58
+
59
+ # Build text output from DOCX document
60
+ #
61
+ # @param doc [Docx::Document] opened document
62
+ # @param file_path [String] original file path
63
+ # @return [String] formatted text output
64
+ def build_text_output(doc, file_path)
65
+ output = []
66
+ output << "DOCX: #{File.basename(file_path)}"
67
+ output << "=" * 60
68
+ output << ""
69
+
70
+ # Paragraphs
71
+ doc.paragraphs.each do |para|
72
+ text = para.text.strip
73
+ output << text unless text.empty?
74
+ end
75
+
76
+ # Tables
77
+ doc.tables.each_with_index do |table, idx|
78
+ output << ""
79
+ output << "Table #{idx + 1}:"
80
+ output << "-" * 40
81
+ table.rows.each do |row|
82
+ cells = row.cells.map(&:text)
83
+ output << cells.join(" | ")
84
+ end
85
+ end
86
+
87
+ output.join("\n")
88
+ end
89
+
90
+ # Extract images from DOCX ZIP (word/media/)
91
+ #
92
+ # @param docx_path [String] path to DOCX file
93
+ # @return [Array<String>] paths to extracted image files
94
+ def extract_images(docx_path)
95
+ require "zip"
96
+ images = []
97
+ temp_dir = Dir.mktmpdir("docx_#{Process.pid}")
98
+
99
+ Zip::File.open(docx_path) do |zip|
100
+ zip.each do |entry|
101
+ next unless entry.name.start_with?("word/media/")
102
+
103
+ ext = File.extname(entry.name).downcase
104
+ next unless [".png", ".jpg", ".jpeg", ".gif"].include?(ext)
105
+
106
+ path = File.join(temp_dir, File.basename(entry.name))
107
+ entry.extract(path)
108
+ images << path
109
+ end
110
+ end
111
+
112
+ images
113
+ rescue StandardError
114
+ [] # Silently ignore extraction failures
115
+ end
116
+ end
117
+ end
118
+ end
119
+ end
120
+ end
@@ -0,0 +1,111 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SwarmSDK
4
+ module V3
5
+ module Tools
6
+ module DocumentConverters
7
+ # PDF document converter
8
+ #
9
+ # Converts PDF files to text and extracts JPEG images.
10
+ # Requires the pdf-reader gem.
11
+ class PdfConverter < Base
12
+ class << self
13
+ # @return [String] gem name
14
+ def gem_name
15
+ "pdf-reader"
16
+ end
17
+
18
+ # @return [String] format name
19
+ def format_name
20
+ "PDF"
21
+ end
22
+
23
+ # @return [Array<String>] supported extensions
24
+ def extensions
25
+ [".pdf"]
26
+ end
27
+ end
28
+
29
+ # Convert PDF to text with optional image attachments
30
+ #
31
+ # @param file_path [String] path to PDF file
32
+ # @return [String, RubyLLM::Content] text or content with images
33
+ def convert(file_path)
34
+ return unsupported_format_message unless self.class.available?
35
+
36
+ require "pdf-reader"
37
+ reader = PDF::Reader.new(file_path)
38
+
39
+ # Extract text from all pages
40
+ output = build_text_output(reader, file_path)
41
+
42
+ # Extract JPEG images (inline - no separate class)
43
+ image_paths = extract_jpeg_images(reader)
44
+
45
+ # Return with images if any extracted
46
+ if image_paths.any?
47
+ content = RubyLLM::Content.new(output)
48
+ image_paths.each { |path| content.add_attachment(path) }
49
+ content
50
+ else
51
+ output
52
+ end
53
+ rescue PDF::Reader::MalformedPDFError => e
54
+ error("Malformed PDF: #{e.message}")
55
+ rescue StandardError => e
56
+ error("PDF conversion failed: #{e.message}")
57
+ end
58
+
59
+ private
60
+
61
+ # Build text output from PDF pages
62
+ #
63
+ # @param reader [PDF::Reader] initialized reader
64
+ # @param file_path [String] original file path
65
+ # @return [String] formatted text output
66
+ def build_text_output(reader, file_path)
67
+ output = []
68
+ output << "PDF: #{File.basename(file_path)}"
69
+ output << "=" * 60
70
+ output << "Pages: #{reader.page_count}"
71
+ output << ""
72
+
73
+ reader.pages.each_with_index do |page, idx|
74
+ output << "Page #{idx + 1}:"
75
+ output << "-" * 60
76
+ text = page.text.strip
77
+ output << (text.empty? ? "(No text)" : text)
78
+ output << ""
79
+ end
80
+
81
+ output.join("\n")
82
+ end
83
+
84
+ # Extract JPEG images only (LLM API compatible)
85
+ #
86
+ # @param reader [PDF::Reader] initialized reader
87
+ # @return [Array<String>] paths to extracted JPEG files
88
+ def extract_jpeg_images(reader)
89
+ images = []
90
+ temp_dir = Dir.mktmpdir("pdf_#{Process.pid}")
91
+
92
+ reader.pages.each_with_index do |page, page_num|
93
+ page.xobjects.each do |name, stream|
94
+ next unless stream.hash[:Subtype] == :Image
95
+ next unless stream.hash[:Filter] == :DCTDecode # JPEG only
96
+
97
+ path = File.join(temp_dir, "p#{page_num + 1}_#{name}.jpg")
98
+ File.binwrite(path, stream.data)
99
+ images << path
100
+ end
101
+ end
102
+
103
+ images
104
+ rescue StandardError
105
+ [] # Silently ignore extraction failures
106
+ end
107
+ end
108
+ end
109
+ end
110
+ end
111
+ end
@@ -0,0 +1,128 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SwarmSDK
4
+ module V3
5
+ module Tools
6
+ module DocumentConverters
7
+ # XLSX/Spreadsheet converter
8
+ #
9
+ # Converts spreadsheet files (XLSX, XLS, ODS) to CSV format.
10
+ # Requires the roo gem (and roo-xls for legacy XLS support).
11
+ class XlsxConverter < Base
12
+ class << self
13
+ # @return [String] gem name
14
+ def gem_name
15
+ "roo"
16
+ end
17
+
18
+ # @return [String] format name
19
+ def format_name
20
+ "XLSX/Spreadsheet"
21
+ end
22
+
23
+ # @return [Array<String>] supported extensions
24
+ def extensions
25
+ [".xlsx", ".xls", ".ods"]
26
+ end
27
+ end
28
+
29
+ # Convert spreadsheet to CSV text format
30
+ #
31
+ # @param file_path [String] path to spreadsheet file
32
+ # @return [String] CSV formatted text
33
+ def convert(file_path)
34
+ return unsupported_format_message unless self.class.available?
35
+ return unsupported_xls_message if file_path.end_with?(".xls") && !xls_available?
36
+
37
+ require "roo"
38
+ require "csv"
39
+
40
+ spreadsheet = Roo::Spreadsheet.open(file_path)
41
+ build_csv_output(spreadsheet, file_path)
42
+ rescue StandardError => e
43
+ error("Spreadsheet conversion failed: #{e.message}")
44
+ end
45
+
46
+ private
47
+
48
+ # Check if roo-xls gem is available for legacy XLS support
49
+ #
50
+ # @return [Boolean] true if roo-xls is installed
51
+ def xls_available?
52
+ Gem::Specification.find_by_name("roo-xls")
53
+ true
54
+ rescue Gem::MissingSpecError
55
+ false
56
+ end
57
+
58
+ # Return system reminder for missing roo-xls gem
59
+ #
60
+ # @return [String] formatted system reminder
61
+ def unsupported_xls_message
62
+ <<~MSG.strip
63
+ <system-reminder>
64
+ Legacy XLS format requires additional gem.
65
+
66
+ To enable XLS support:
67
+ gem install roo-xls
68
+
69
+ Or save as .xlsx format.
70
+ </system-reminder>
71
+ MSG
72
+ end
73
+
74
+ # Build CSV output from spreadsheet
75
+ #
76
+ # @param spreadsheet [Roo::Spreadsheet] opened spreadsheet
77
+ # @param file_path [String] original file path
78
+ # @return [String] formatted CSV output
79
+ def build_csv_output(spreadsheet, file_path)
80
+ output = []
81
+ output << "Spreadsheet: #{File.basename(file_path)}"
82
+ output << "=" * 60
83
+ output << ""
84
+
85
+ spreadsheet.sheets.each do |sheet_name|
86
+ spreadsheet.default_sheet = sheet_name
87
+ rows = spreadsheet.last_row || 0
88
+ cols = spreadsheet.last_column || 0
89
+
90
+ output << "Sheet: #{sheet_name} (#{rows} rows × #{cols} cols)"
91
+ output << "-" * 60
92
+
93
+ # Stream rows for memory efficiency
94
+ spreadsheet.each_row_streaming do |row|
95
+ cells = row.map { |cell| format_cell(cell) }
96
+ output << CSV.generate_line(cells).chomp
97
+ end
98
+
99
+ output << ""
100
+ end
101
+
102
+ output.join("\n")
103
+ end
104
+
105
+ # Format cell based on type
106
+ #
107
+ # @param cell [Roo::Cell] cell to format
108
+ # @return [String] formatted cell value
109
+ def format_cell(cell)
110
+ return "" if cell.nil? || cell.value.nil?
111
+
112
+ case cell.type
113
+ when :string then cell.value.to_s
114
+ when :float, :number then cell.value.to_s
115
+ when :date then cell.value.strftime("%Y-%m-%d")
116
+ when :datetime then cell.value.strftime("%Y-%m-%d %H:%M:%S")
117
+ when :time then cell.value.strftime("%H:%M:%S")
118
+ when :boolean then cell.value ? "TRUE" : "FALSE"
119
+ when :formula then cell.value.to_s # Calculated value
120
+ when :percentage then "#{(cell.value * 100).round(2)}%"
121
+ else cell.value.to_s
122
+ end
123
+ end
124
+ end
125
+ end
126
+ end
127
+ end
128
+ end
@@ -7,7 +7,15 @@ module SwarmSDK
7
7
  #
8
8
  # Supports reading entire files or specific line ranges with line numbers.
9
9
  # Tracks reads per agent for enforcing read-before-write/edit rules.
10
+ # Supports document formats (PDF, DOCX, XLSX) if gems installed.
10
11
  class Read < Base
12
+ # Document converters (optional gems)
13
+ CONVERTERS = [
14
+ DocumentConverters::PdfConverter,
15
+ DocumentConverters::DocxConverter,
16
+ DocumentConverters::XlsxConverter,
17
+ ].freeze
18
+
11
19
  class << self
12
20
  # @return [Array<Symbol>] Constructor requirements
13
21
  def creation_requirements
@@ -19,6 +27,7 @@ module SwarmSDK
19
27
  Reads a file from the local filesystem.
20
28
 
21
29
  Supports text files with line numbers. Binary files (images) are returned as visual content.
30
+ Supports document formats (PDF, DOCX, XLSX) if gems installed.
22
31
 
23
32
  Path handling:
24
33
  - Relative paths resolve against your working directory
@@ -32,12 +41,12 @@ module SwarmSDK
32
41
 
33
42
  param :offset,
34
43
  type: "integer",
35
- desc: "Line number to start reading from (1-indexed). Use for large files.",
44
+ desc: "Line number to start reading from (1-indexed). Use for large text files. Ignored for documents.",
36
45
  required: false
37
46
 
38
47
  param :limit,
39
48
  type: "integer",
40
- desc: "Number of lines to read. Use for large files.",
49
+ desc: "Number of lines to read. Use for large text files. Ignored for documents.",
41
50
  required: false
42
51
 
43
52
  # @param agent_name [Symbol, String] Agent identifier for read tracking
@@ -72,6 +81,20 @@ module SwarmSDK
72
81
  return validation_error("File does not exist: #{file_path}") unless File.exist?(resolved_path)
73
82
  return validation_error("Path is a directory. Use Bash with ls to list directories.") if File.directory?(resolved_path)
74
83
 
84
+ # Try document converter first
85
+ converter_class = find_converter(resolved_path)
86
+ if converter_class
87
+ result = converter_class.new.convert(resolved_path)
88
+
89
+ # Register read for successful conversions
90
+ unless result.start_with?("<system-reminder>") || result.start_with?("Error:")
91
+ @read_tracker.register_read(@agent_name, resolved_path)
92
+ end
93
+
94
+ return result
95
+ end
96
+
97
+ # Standard text file handling
75
98
  content = read_file_content(resolved_path)
76
99
 
77
100
  # Binary file — return as-is
@@ -175,6 +198,15 @@ module SwarmSDK
175
198
  "\n\n<system-reminder>This file has #{total_lines} lines but only the first #{limit} are shown. " \
176
199
  "Use offset and limit parameters to read more.</system-reminder>"
177
200
  end
201
+
202
+ # Find appropriate document converter for file extension
203
+ #
204
+ # @param file_path [String] Resolved file path
205
+ # @return [Class, nil] Converter class or nil if no match
206
+ def find_converter(file_path)
207
+ ext = File.extname(file_path).downcase
208
+ CONVERTERS.find { |c| c.extensions.include?(ext) }
209
+ end
178
210
  end
179
211
  end
180
212
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: swarm_sdk
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.0.0.alpha1
4
+ version: 3.0.0.alpha3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paulo Arruda
8
8
  bindir: bin
9
9
  cert_chain: []
10
- date: 1980-01-02 00:00:00.000000000 Z
10
+ date: 1980-01-01 00:00:00.000000000 Z
11
11
  dependencies:
12
12
  - !ruby/object:Gem::Dependency
13
13
  name: async
@@ -139,6 +139,9 @@ files:
139
139
  - lib/swarm_sdk/v3/hooks/context.rb
140
140
  - lib/swarm_sdk/v3/hooks/result.rb
141
141
  - lib/swarm_sdk/v3/hooks/runner.rb
142
+ - lib/swarm_sdk/v3/loop/executor.rb
143
+ - lib/swarm_sdk/v3/loop/iteration.rb
144
+ - lib/swarm_sdk/v3/loop/result.rb
142
145
  - lib/swarm_sdk/v3/mcp/connector.rb
143
146
  - lib/swarm_sdk/v3/mcp/mcp_error.rb
144
147
  - lib/swarm_sdk/v3/mcp/server_definition.rb
@@ -167,6 +170,10 @@ files:
167
170
  - lib/swarm_sdk/v3/tools/base.rb
168
171
  - lib/swarm_sdk/v3/tools/bash.rb
169
172
  - lib/swarm_sdk/v3/tools/clock.rb
173
+ - lib/swarm_sdk/v3/tools/document_converters/base.rb
174
+ - lib/swarm_sdk/v3/tools/document_converters/docx_converter.rb
175
+ - lib/swarm_sdk/v3/tools/document_converters/pdf_converter.rb
176
+ - lib/swarm_sdk/v3/tools/document_converters/xlsx_converter.rb
170
177
  - lib/swarm_sdk/v3/tools/edit.rb
171
178
  - lib/swarm_sdk/v3/tools/glob.rb
172
179
  - lib/swarm_sdk/v3/tools/grep.rb