swarm_sdk 3.0.0.alpha1 → 3.0.0.alpha2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/swarm_sdk/v3/tools/document_converters/base.rb +84 -0
- data/lib/swarm_sdk/v3/tools/document_converters/docx_converter.rb +120 -0
- data/lib/swarm_sdk/v3/tools/document_converters/pdf_converter.rb +111 -0
- data/lib/swarm_sdk/v3/tools/document_converters/xlsx_converter.rb +128 -0
- data/lib/swarm_sdk/v3/tools/read.rb +34 -2
- metadata +6 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 4feb796b3437eea5e610a13ed80923a4d2bda9841fc164d44cb330a5da20e97e
|
|
4
|
+
data.tar.gz: 3006b77e78aedd12c2371300d6599870aef32f9bf33285c7229ce9eadd453364
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 87bfebe06e4c043c131a6d51d2effe8cd7de7893075593b2b008126887f5837850486c8e354f918ef2fa283c9d286638a25efc632917f0a86ab57815102333ce
|
|
7
|
+
data.tar.gz: e2afe829f9c2394de45c07eb2b6ff692446ee2108d0b615a375447a87c797bd59b4f0745d96b1c7a8a40b7ef350e167b29c247a7a376ea366feb370ea0eaac1a
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SwarmSDK
|
|
4
|
+
module V3
|
|
5
|
+
module Tools
|
|
6
|
+
module DocumentConverters
|
|
7
|
+
# Abstract base class for document converters
|
|
8
|
+
#
|
|
9
|
+
# Provides common interface and helpers for converting documents to text.
|
|
10
|
+
# Each converter checks gem availability and provides clear error messages.
|
|
11
|
+
class Base
|
|
12
|
+
class << self
|
|
13
|
+
# Gem name required for this converter
|
|
14
|
+
#
|
|
15
|
+
# @return [String] the gem name
|
|
16
|
+
# @raise [NotImplementedError] if not implemented by subclass
|
|
17
|
+
def gem_name
|
|
18
|
+
raise NotImplementedError, "#{name} must implement .gem_name"
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
# Human-readable format name
|
|
22
|
+
#
|
|
23
|
+
# @return [String] the format name (e.g., "PDF", "DOCX")
|
|
24
|
+
# @raise [NotImplementedError] if not implemented by subclass
|
|
25
|
+
def format_name
|
|
26
|
+
raise NotImplementedError, "#{name} must implement .format_name"
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# File extensions this converter handles
|
|
30
|
+
#
|
|
31
|
+
# @return [Array<String>] array of extensions including dot (e.g., [".pdf"])
|
|
32
|
+
# @raise [NotImplementedError] if not implemented by subclass
|
|
33
|
+
def extensions
|
|
34
|
+
raise NotImplementedError, "#{name} must implement .extensions"
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Check if required gem is available
|
|
38
|
+
#
|
|
39
|
+
# @return [Boolean] true if gem is installed
|
|
40
|
+
def available?
|
|
41
|
+
Gem::Specification.find_by_name(gem_name)
|
|
42
|
+
true
|
|
43
|
+
rescue Gem::MissingSpecError
|
|
44
|
+
false
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Convert document to text (possibly with image attachments)
|
|
49
|
+
#
|
|
50
|
+
# @param file_path [String] absolute path to document
|
|
51
|
+
# @return [String, RubyLLM::Content] text or text with image attachments
|
|
52
|
+
# @raise [NotImplementedError] if not implemented by subclass
|
|
53
|
+
def convert(file_path)
|
|
54
|
+
raise NotImplementedError, "#{self.class.name} must implement #convert"
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
protected
|
|
58
|
+
|
|
59
|
+
# Return system reminder for missing gem
|
|
60
|
+
#
|
|
61
|
+
# @return [String] formatted system reminder message
|
|
62
|
+
def unsupported_format_message
|
|
63
|
+
<<~MSG.strip
|
|
64
|
+
<system-reminder>
|
|
65
|
+
This is a #{self.class.format_name} document, but the required gem is not installed.
|
|
66
|
+
|
|
67
|
+
To enable #{self.class.format_name} reading:
|
|
68
|
+
gem install #{self.class.gem_name}
|
|
69
|
+
</system-reminder>
|
|
70
|
+
MSG
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# Return formatted error message
|
|
74
|
+
#
|
|
75
|
+
# @param message [String] error description
|
|
76
|
+
# @return [String] formatted error message
|
|
77
|
+
def error(message)
|
|
78
|
+
"Error: #{message}"
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SwarmSDK
|
|
4
|
+
module V3
|
|
5
|
+
module Tools
|
|
6
|
+
module DocumentConverters
|
|
7
|
+
# DOCX document converter
|
|
8
|
+
#
|
|
9
|
+
# Converts DOCX files to text and extracts images.
|
|
10
|
+
# Requires the docx gem (which includes rubyzip).
|
|
11
|
+
class DocxConverter < Base
|
|
12
|
+
class << self
|
|
13
|
+
# @return [String] gem name
|
|
14
|
+
def gem_name
|
|
15
|
+
"docx"
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# @return [String] format name
|
|
19
|
+
def format_name
|
|
20
|
+
"DOCX"
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# @return [Array<String>] supported extensions
|
|
24
|
+
def extensions
|
|
25
|
+
[".docx"]
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Convert DOCX to text with optional image attachments
|
|
30
|
+
#
|
|
31
|
+
# @param file_path [String] path to DOCX file
|
|
32
|
+
# @return [String, RubyLLM::Content] text or content with images
|
|
33
|
+
def convert(file_path)
|
|
34
|
+
return unsupported_format_message unless self.class.available?
|
|
35
|
+
return error("Legacy .doc format not supported") if file_path.end_with?(".doc")
|
|
36
|
+
|
|
37
|
+
require "docx"
|
|
38
|
+
doc = Docx::Document.open(file_path)
|
|
39
|
+
|
|
40
|
+
# Extract text content
|
|
41
|
+
output = build_text_output(doc, file_path)
|
|
42
|
+
|
|
43
|
+
# Extract images (inline - no separate class)
|
|
44
|
+
image_paths = extract_images(file_path)
|
|
45
|
+
|
|
46
|
+
if image_paths.any?
|
|
47
|
+
content = RubyLLM::Content.new(output)
|
|
48
|
+
image_paths.each { |path| content.add_attachment(path) }
|
|
49
|
+
content
|
|
50
|
+
else
|
|
51
|
+
output
|
|
52
|
+
end
|
|
53
|
+
rescue StandardError => e
|
|
54
|
+
error("DOCX conversion failed: #{e.message}")
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
private
|
|
58
|
+
|
|
59
|
+
# Build text output from DOCX document
|
|
60
|
+
#
|
|
61
|
+
# @param doc [Docx::Document] opened document
|
|
62
|
+
# @param file_path [String] original file path
|
|
63
|
+
# @return [String] formatted text output
|
|
64
|
+
def build_text_output(doc, file_path)
|
|
65
|
+
output = []
|
|
66
|
+
output << "DOCX: #{File.basename(file_path)}"
|
|
67
|
+
output << "=" * 60
|
|
68
|
+
output << ""
|
|
69
|
+
|
|
70
|
+
# Paragraphs
|
|
71
|
+
doc.paragraphs.each do |para|
|
|
72
|
+
text = para.text.strip
|
|
73
|
+
output << text unless text.empty?
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# Tables
|
|
77
|
+
doc.tables.each_with_index do |table, idx|
|
|
78
|
+
output << ""
|
|
79
|
+
output << "Table #{idx + 1}:"
|
|
80
|
+
output << "-" * 40
|
|
81
|
+
table.rows.each do |row|
|
|
82
|
+
cells = row.cells.map(&:text)
|
|
83
|
+
output << cells.join(" | ")
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
output.join("\n")
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Extract images from DOCX ZIP (word/media/)
|
|
91
|
+
#
|
|
92
|
+
# @param docx_path [String] path to DOCX file
|
|
93
|
+
# @return [Array<String>] paths to extracted image files
|
|
94
|
+
def extract_images(docx_path)
|
|
95
|
+
require "zip"
|
|
96
|
+
images = []
|
|
97
|
+
temp_dir = Dir.mktmpdir("docx_#{Process.pid}")
|
|
98
|
+
|
|
99
|
+
Zip::File.open(docx_path) do |zip|
|
|
100
|
+
zip.each do |entry|
|
|
101
|
+
next unless entry.name.start_with?("word/media/")
|
|
102
|
+
|
|
103
|
+
ext = File.extname(entry.name).downcase
|
|
104
|
+
next unless [".png", ".jpg", ".jpeg", ".gif"].include?(ext)
|
|
105
|
+
|
|
106
|
+
path = File.join(temp_dir, File.basename(entry.name))
|
|
107
|
+
entry.extract(path)
|
|
108
|
+
images << path
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
images
|
|
113
|
+
rescue StandardError
|
|
114
|
+
[] # Silently ignore extraction failures
|
|
115
|
+
end
|
|
116
|
+
end
|
|
117
|
+
end
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
end
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SwarmSDK
|
|
4
|
+
module V3
|
|
5
|
+
module Tools
|
|
6
|
+
module DocumentConverters
|
|
7
|
+
# PDF document converter
|
|
8
|
+
#
|
|
9
|
+
# Converts PDF files to text and extracts JPEG images.
|
|
10
|
+
# Requires the pdf-reader gem.
|
|
11
|
+
class PdfConverter < Base
|
|
12
|
+
class << self
|
|
13
|
+
# @return [String] gem name
|
|
14
|
+
def gem_name
|
|
15
|
+
"pdf-reader"
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# @return [String] format name
|
|
19
|
+
def format_name
|
|
20
|
+
"PDF"
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# @return [Array<String>] supported extensions
|
|
24
|
+
def extensions
|
|
25
|
+
[".pdf"]
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Convert PDF to text with optional image attachments
|
|
30
|
+
#
|
|
31
|
+
# @param file_path [String] path to PDF file
|
|
32
|
+
# @return [String, RubyLLM::Content] text or content with images
|
|
33
|
+
def convert(file_path)
|
|
34
|
+
return unsupported_format_message unless self.class.available?
|
|
35
|
+
|
|
36
|
+
require "pdf-reader"
|
|
37
|
+
reader = PDF::Reader.new(file_path)
|
|
38
|
+
|
|
39
|
+
# Extract text from all pages
|
|
40
|
+
output = build_text_output(reader, file_path)
|
|
41
|
+
|
|
42
|
+
# Extract JPEG images (inline - no separate class)
|
|
43
|
+
image_paths = extract_jpeg_images(reader)
|
|
44
|
+
|
|
45
|
+
# Return with images if any extracted
|
|
46
|
+
if image_paths.any?
|
|
47
|
+
content = RubyLLM::Content.new(output)
|
|
48
|
+
image_paths.each { |path| content.add_attachment(path) }
|
|
49
|
+
content
|
|
50
|
+
else
|
|
51
|
+
output
|
|
52
|
+
end
|
|
53
|
+
rescue PDF::Reader::MalformedPDFError => e
|
|
54
|
+
error("Malformed PDF: #{e.message}")
|
|
55
|
+
rescue StandardError => e
|
|
56
|
+
error("PDF conversion failed: #{e.message}")
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
private
|
|
60
|
+
|
|
61
|
+
# Build text output from PDF pages
|
|
62
|
+
#
|
|
63
|
+
# @param reader [PDF::Reader] initialized reader
|
|
64
|
+
# @param file_path [String] original file path
|
|
65
|
+
# @return [String] formatted text output
|
|
66
|
+
def build_text_output(reader, file_path)
|
|
67
|
+
output = []
|
|
68
|
+
output << "PDF: #{File.basename(file_path)}"
|
|
69
|
+
output << "=" * 60
|
|
70
|
+
output << "Pages: #{reader.page_count}"
|
|
71
|
+
output << ""
|
|
72
|
+
|
|
73
|
+
reader.pages.each_with_index do |page, idx|
|
|
74
|
+
output << "Page #{idx + 1}:"
|
|
75
|
+
output << "-" * 60
|
|
76
|
+
text = page.text.strip
|
|
77
|
+
output << (text.empty? ? "(No text)" : text)
|
|
78
|
+
output << ""
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
output.join("\n")
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Extract JPEG images only (LLM API compatible)
|
|
85
|
+
#
|
|
86
|
+
# @param reader [PDF::Reader] initialized reader
|
|
87
|
+
# @return [Array<String>] paths to extracted JPEG files
|
|
88
|
+
def extract_jpeg_images(reader)
|
|
89
|
+
images = []
|
|
90
|
+
temp_dir = Dir.mktmpdir("pdf_#{Process.pid}")
|
|
91
|
+
|
|
92
|
+
reader.pages.each_with_index do |page, page_num|
|
|
93
|
+
page.xobjects.each do |name, stream|
|
|
94
|
+
next unless stream.hash[:Subtype] == :Image
|
|
95
|
+
next unless stream.hash[:Filter] == :DCTDecode # JPEG only
|
|
96
|
+
|
|
97
|
+
path = File.join(temp_dir, "p#{page_num + 1}_#{name}.jpg")
|
|
98
|
+
File.binwrite(path, stream.data)
|
|
99
|
+
images << path
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
images
|
|
104
|
+
rescue StandardError
|
|
105
|
+
[] # Silently ignore extraction failures
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
|
111
|
+
end
|
|
@@ -0,0 +1,128 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SwarmSDK
|
|
4
|
+
module V3
|
|
5
|
+
module Tools
|
|
6
|
+
module DocumentConverters
|
|
7
|
+
# XLSX/Spreadsheet converter
|
|
8
|
+
#
|
|
9
|
+
# Converts spreadsheet files (XLSX, XLS, ODS) to CSV format.
|
|
10
|
+
# Requires the roo gem (and roo-xls for legacy XLS support).
|
|
11
|
+
class XlsxConverter < Base
|
|
12
|
+
class << self
|
|
13
|
+
# @return [String] gem name
|
|
14
|
+
def gem_name
|
|
15
|
+
"roo"
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# @return [String] format name
|
|
19
|
+
def format_name
|
|
20
|
+
"XLSX/Spreadsheet"
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# @return [Array<String>] supported extensions
|
|
24
|
+
def extensions
|
|
25
|
+
[".xlsx", ".xls", ".ods"]
|
|
26
|
+
end
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Convert spreadsheet to CSV text format
|
|
30
|
+
#
|
|
31
|
+
# @param file_path [String] path to spreadsheet file
|
|
32
|
+
# @return [String] CSV formatted text
|
|
33
|
+
def convert(file_path)
|
|
34
|
+
return unsupported_format_message unless self.class.available?
|
|
35
|
+
return unsupported_xls_message if file_path.end_with?(".xls") && !xls_available?
|
|
36
|
+
|
|
37
|
+
require "roo"
|
|
38
|
+
require "csv"
|
|
39
|
+
|
|
40
|
+
spreadsheet = Roo::Spreadsheet.open(file_path)
|
|
41
|
+
build_csv_output(spreadsheet, file_path)
|
|
42
|
+
rescue StandardError => e
|
|
43
|
+
error("Spreadsheet conversion failed: #{e.message}")
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
private
|
|
47
|
+
|
|
48
|
+
# Check if roo-xls gem is available for legacy XLS support
|
|
49
|
+
#
|
|
50
|
+
# @return [Boolean] true if roo-xls is installed
|
|
51
|
+
def xls_available?
|
|
52
|
+
Gem::Specification.find_by_name("roo-xls")
|
|
53
|
+
true
|
|
54
|
+
rescue Gem::MissingSpecError
|
|
55
|
+
false
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Return system reminder for missing roo-xls gem
|
|
59
|
+
#
|
|
60
|
+
# @return [String] formatted system reminder
|
|
61
|
+
def unsupported_xls_message
|
|
62
|
+
<<~MSG.strip
|
|
63
|
+
<system-reminder>
|
|
64
|
+
Legacy XLS format requires additional gem.
|
|
65
|
+
|
|
66
|
+
To enable XLS support:
|
|
67
|
+
gem install roo-xls
|
|
68
|
+
|
|
69
|
+
Or save as .xlsx format.
|
|
70
|
+
</system-reminder>
|
|
71
|
+
MSG
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Build CSV output from spreadsheet
|
|
75
|
+
#
|
|
76
|
+
# @param spreadsheet [Roo::Spreadsheet] opened spreadsheet
|
|
77
|
+
# @param file_path [String] original file path
|
|
78
|
+
# @return [String] formatted CSV output
|
|
79
|
+
def build_csv_output(spreadsheet, file_path)
|
|
80
|
+
output = []
|
|
81
|
+
output << "Spreadsheet: #{File.basename(file_path)}"
|
|
82
|
+
output << "=" * 60
|
|
83
|
+
output << ""
|
|
84
|
+
|
|
85
|
+
spreadsheet.sheets.each do |sheet_name|
|
|
86
|
+
spreadsheet.default_sheet = sheet_name
|
|
87
|
+
rows = spreadsheet.last_row || 0
|
|
88
|
+
cols = spreadsheet.last_column || 0
|
|
89
|
+
|
|
90
|
+
output << "Sheet: #{sheet_name} (#{rows} rows × #{cols} cols)"
|
|
91
|
+
output << "-" * 60
|
|
92
|
+
|
|
93
|
+
# Stream rows for memory efficiency
|
|
94
|
+
spreadsheet.each_row_streaming do |row|
|
|
95
|
+
cells = row.map { |cell| format_cell(cell) }
|
|
96
|
+
output << CSV.generate_line(cells).chomp
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
output << ""
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
output.join("\n")
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Format cell based on type
|
|
106
|
+
#
|
|
107
|
+
# @param cell [Roo::Cell] cell to format
|
|
108
|
+
# @return [String] formatted cell value
|
|
109
|
+
def format_cell(cell)
|
|
110
|
+
return "" if cell.nil? || cell.value.nil?
|
|
111
|
+
|
|
112
|
+
case cell.type
|
|
113
|
+
when :string then cell.value.to_s
|
|
114
|
+
when :float, :number then cell.value.to_s
|
|
115
|
+
when :date then cell.value.strftime("%Y-%m-%d")
|
|
116
|
+
when :datetime then cell.value.strftime("%Y-%m-%d %H:%M:%S")
|
|
117
|
+
when :time then cell.value.strftime("%H:%M:%S")
|
|
118
|
+
when :boolean then cell.value ? "TRUE" : "FALSE"
|
|
119
|
+
when :formula then cell.value.to_s # Calculated value
|
|
120
|
+
when :percentage then "#{(cell.value * 100).round(2)}%"
|
|
121
|
+
else cell.value.to_s
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
end
|
|
@@ -7,7 +7,15 @@ module SwarmSDK
|
|
|
7
7
|
#
|
|
8
8
|
# Supports reading entire files or specific line ranges with line numbers.
|
|
9
9
|
# Tracks reads per agent for enforcing read-before-write/edit rules.
|
|
10
|
+
# Supports document formats (PDF, DOCX, XLSX) if gems installed.
|
|
10
11
|
class Read < Base
|
|
12
|
+
# Document converters (optional gems)
|
|
13
|
+
CONVERTERS = [
|
|
14
|
+
DocumentConverters::PdfConverter,
|
|
15
|
+
DocumentConverters::DocxConverter,
|
|
16
|
+
DocumentConverters::XlsxConverter,
|
|
17
|
+
].freeze
|
|
18
|
+
|
|
11
19
|
class << self
|
|
12
20
|
# @return [Array<Symbol>] Constructor requirements
|
|
13
21
|
def creation_requirements
|
|
@@ -19,6 +27,7 @@ module SwarmSDK
|
|
|
19
27
|
Reads a file from the local filesystem.
|
|
20
28
|
|
|
21
29
|
Supports text files with line numbers. Binary files (images) are returned as visual content.
|
|
30
|
+
Supports document formats (PDF, DOCX, XLSX) if gems installed.
|
|
22
31
|
|
|
23
32
|
Path handling:
|
|
24
33
|
- Relative paths resolve against your working directory
|
|
@@ -32,12 +41,12 @@ module SwarmSDK
|
|
|
32
41
|
|
|
33
42
|
param :offset,
|
|
34
43
|
type: "integer",
|
|
35
|
-
desc: "Line number to start reading from (1-indexed). Use for large files.",
|
|
44
|
+
desc: "Line number to start reading from (1-indexed). Use for large text files. Ignored for documents.",
|
|
36
45
|
required: false
|
|
37
46
|
|
|
38
47
|
param :limit,
|
|
39
48
|
type: "integer",
|
|
40
|
-
desc: "Number of lines to read. Use for large files.",
|
|
49
|
+
desc: "Number of lines to read. Use for large text files. Ignored for documents.",
|
|
41
50
|
required: false
|
|
42
51
|
|
|
43
52
|
# @param agent_name [Symbol, String] Agent identifier for read tracking
|
|
@@ -72,6 +81,20 @@ module SwarmSDK
|
|
|
72
81
|
return validation_error("File does not exist: #{file_path}") unless File.exist?(resolved_path)
|
|
73
82
|
return validation_error("Path is a directory. Use Bash with ls to list directories.") if File.directory?(resolved_path)
|
|
74
83
|
|
|
84
|
+
# Try document converter first
|
|
85
|
+
converter_class = find_converter(resolved_path)
|
|
86
|
+
if converter_class
|
|
87
|
+
result = converter_class.new.convert(resolved_path)
|
|
88
|
+
|
|
89
|
+
# Register read for successful conversions
|
|
90
|
+
unless result.start_with?("<system-reminder>") || result.start_with?("Error:")
|
|
91
|
+
@read_tracker.register_read(@agent_name, resolved_path)
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
return result
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Standard text file handling
|
|
75
98
|
content = read_file_content(resolved_path)
|
|
76
99
|
|
|
77
100
|
# Binary file — return as-is
|
|
@@ -175,6 +198,15 @@ module SwarmSDK
|
|
|
175
198
|
"\n\n<system-reminder>This file has #{total_lines} lines but only the first #{limit} are shown. " \
|
|
176
199
|
"Use offset and limit parameters to read more.</system-reminder>"
|
|
177
200
|
end
|
|
201
|
+
|
|
202
|
+
# Find appropriate document converter for file extension
|
|
203
|
+
#
|
|
204
|
+
# @param file_path [String] Resolved file path
|
|
205
|
+
# @return [Class, nil] Converter class or nil if no match
|
|
206
|
+
def find_converter(file_path)
|
|
207
|
+
ext = File.extname(file_path).downcase
|
|
208
|
+
CONVERTERS.find { |c| c.extensions.include?(ext) }
|
|
209
|
+
end
|
|
178
210
|
end
|
|
179
211
|
end
|
|
180
212
|
end
|
metadata
CHANGED
|
@@ -1,13 +1,13 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: swarm_sdk
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 3.0.0.
|
|
4
|
+
version: 3.0.0.alpha2
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Paulo Arruda
|
|
8
8
|
bindir: bin
|
|
9
9
|
cert_chain: []
|
|
10
|
-
date: 1980-01-
|
|
10
|
+
date: 1980-01-01 00:00:00.000000000 Z
|
|
11
11
|
dependencies:
|
|
12
12
|
- !ruby/object:Gem::Dependency
|
|
13
13
|
name: async
|
|
@@ -167,6 +167,10 @@ files:
|
|
|
167
167
|
- lib/swarm_sdk/v3/tools/base.rb
|
|
168
168
|
- lib/swarm_sdk/v3/tools/bash.rb
|
|
169
169
|
- lib/swarm_sdk/v3/tools/clock.rb
|
|
170
|
+
- lib/swarm_sdk/v3/tools/document_converters/base.rb
|
|
171
|
+
- lib/swarm_sdk/v3/tools/document_converters/docx_converter.rb
|
|
172
|
+
- lib/swarm_sdk/v3/tools/document_converters/pdf_converter.rb
|
|
173
|
+
- lib/swarm_sdk/v3/tools/document_converters/xlsx_converter.rb
|
|
170
174
|
- lib/swarm_sdk/v3/tools/edit.rb
|
|
171
175
|
- lib/swarm_sdk/v3/tools/glob.rb
|
|
172
176
|
- lib/swarm_sdk/v3/tools/grep.rb
|