swarm_sdk 2.0.0.pre.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/swarm_sdk/agent/builder.rb +333 -0
- data/lib/swarm_sdk/agent/chat/context_tracker.rb +271 -0
- data/lib/swarm_sdk/agent/chat/hook_integration.rb +372 -0
- data/lib/swarm_sdk/agent/chat/logging_helpers.rb +99 -0
- data/lib/swarm_sdk/agent/chat/system_reminder_injector.rb +114 -0
- data/lib/swarm_sdk/agent/chat.rb +779 -0
- data/lib/swarm_sdk/agent/context.rb +108 -0
- data/lib/swarm_sdk/agent/definition.rb +335 -0
- data/lib/swarm_sdk/configuration.rb +251 -0
- data/lib/swarm_sdk/context_compactor/metrics.rb +147 -0
- data/lib/swarm_sdk/context_compactor/token_counter.rb +106 -0
- data/lib/swarm_sdk/context_compactor.rb +340 -0
- data/lib/swarm_sdk/hooks/adapter.rb +359 -0
- data/lib/swarm_sdk/hooks/context.rb +163 -0
- data/lib/swarm_sdk/hooks/definition.rb +80 -0
- data/lib/swarm_sdk/hooks/error.rb +29 -0
- data/lib/swarm_sdk/hooks/executor.rb +146 -0
- data/lib/swarm_sdk/hooks/registry.rb +143 -0
- data/lib/swarm_sdk/hooks/result.rb +150 -0
- data/lib/swarm_sdk/hooks/shell_executor.rb +254 -0
- data/lib/swarm_sdk/hooks/tool_call.rb +35 -0
- data/lib/swarm_sdk/hooks/tool_result.rb +62 -0
- data/lib/swarm_sdk/log_collector.rb +83 -0
- data/lib/swarm_sdk/log_stream.rb +69 -0
- data/lib/swarm_sdk/markdown_parser.rb +46 -0
- data/lib/swarm_sdk/permissions/config.rb +239 -0
- data/lib/swarm_sdk/permissions/error_formatter.rb +121 -0
- data/lib/swarm_sdk/permissions/path_matcher.rb +35 -0
- data/lib/swarm_sdk/permissions/validator.rb +173 -0
- data/lib/swarm_sdk/permissions_builder.rb +122 -0
- data/lib/swarm_sdk/prompts/base_system_prompt.md.erb +237 -0
- data/lib/swarm_sdk/providers/openai_with_responses.rb +582 -0
- data/lib/swarm_sdk/result.rb +97 -0
- data/lib/swarm_sdk/swarm/agent_initializer.rb +224 -0
- data/lib/swarm_sdk/swarm/all_agents_builder.rb +62 -0
- data/lib/swarm_sdk/swarm/builder.rb +240 -0
- data/lib/swarm_sdk/swarm/mcp_configurator.rb +151 -0
- data/lib/swarm_sdk/swarm/tool_configurator.rb +267 -0
- data/lib/swarm_sdk/swarm.rb +837 -0
- data/lib/swarm_sdk/tools/bash.rb +274 -0
- data/lib/swarm_sdk/tools/delegate.rb +152 -0
- data/lib/swarm_sdk/tools/document_converters/base_converter.rb +83 -0
- data/lib/swarm_sdk/tools/document_converters/docx_converter.rb +99 -0
- data/lib/swarm_sdk/tools/document_converters/pdf_converter.rb +78 -0
- data/lib/swarm_sdk/tools/document_converters/xlsx_converter.rb +194 -0
- data/lib/swarm_sdk/tools/edit.rb +150 -0
- data/lib/swarm_sdk/tools/glob.rb +158 -0
- data/lib/swarm_sdk/tools/grep.rb +231 -0
- data/lib/swarm_sdk/tools/image_extractors/docx_image_extractor.rb +43 -0
- data/lib/swarm_sdk/tools/image_extractors/pdf_image_extractor.rb +163 -0
- data/lib/swarm_sdk/tools/image_formats/tiff_builder.rb +65 -0
- data/lib/swarm_sdk/tools/multi_edit.rb +232 -0
- data/lib/swarm_sdk/tools/path_resolver.rb +43 -0
- data/lib/swarm_sdk/tools/read.rb +251 -0
- data/lib/swarm_sdk/tools/registry.rb +73 -0
- data/lib/swarm_sdk/tools/scratchpad_list.rb +88 -0
- data/lib/swarm_sdk/tools/scratchpad_read.rb +59 -0
- data/lib/swarm_sdk/tools/scratchpad_write.rb +88 -0
- data/lib/swarm_sdk/tools/stores/read_tracker.rb +61 -0
- data/lib/swarm_sdk/tools/stores/scratchpad.rb +153 -0
- data/lib/swarm_sdk/tools/stores/todo_manager.rb +65 -0
- data/lib/swarm_sdk/tools/todo_write.rb +216 -0
- data/lib/swarm_sdk/tools/write.rb +117 -0
- data/lib/swarm_sdk/utils.rb +50 -0
- data/lib/swarm_sdk/version.rb +5 -0
- data/lib/swarm_sdk.rb +69 -0
- metadata +169 -0
@@ -0,0 +1,231 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SwarmSDK
|
4
|
+
module Tools
|
5
|
+
# Grep tool for searching file contents using ripgrep-style patterns
|
6
|
+
#
|
7
|
+
# Powerful search capabilities with regex support, context lines, and filtering.
|
8
|
+
# Built on ripgrep (rg) for fast, efficient searching.
|
9
|
+
class Grep < RubyLLM::Tool
|
10
|
+
include PathResolver
|
11
|
+
|
12
|
+
def initialize(directory:)
|
13
|
+
super()
|
14
|
+
@directory = File.expand_path(directory)
|
15
|
+
end
|
16
|
+
|
17
|
+
define_method(:name) { "Grep" }
|
18
|
+
|
19
|
+
description <<~DESC
|
20
|
+
A powerful search tool built on ripgrep
|
21
|
+
|
22
|
+
Usage:
|
23
|
+
- ALWAYS use Grep for search tasks. NEVER invoke `grep` or `rg` as a Bash command. The Grep tool has been optimized for correct permissions and access.
|
24
|
+
- Supports full regex syntax (e.g., "log.*Error", "function\\s+\\w+")
|
25
|
+
- Filter files with glob parameter (e.g., "*.js", "**/*.tsx") or type parameter (e.g., "js", "py", "rust")
|
26
|
+
- Output modes: "content" shows matching lines, "files_with_matches" shows only file paths (default), "count" shows match counts
|
27
|
+
- Use Task tool for open-ended searches requiring multiple rounds
|
28
|
+
- Pattern syntax: Uses ripgrep (not grep) - literal braces need escaping (use `interface\\{\\}` to find `interface{}` in Go code)
|
29
|
+
- Multiline matching: By default patterns match within single lines only. For cross-line patterns like `struct \\{[\\s\\S]*?field`, use `multiline: true`
|
30
|
+
DESC
|
31
|
+
|
32
|
+
param :pattern,
|
33
|
+
type: "string",
|
34
|
+
desc: "The regular expression pattern to search for in file contents",
|
35
|
+
required: true
|
36
|
+
|
37
|
+
param :path,
|
38
|
+
type: "string",
|
39
|
+
desc: "File or directory to search in (rg PATH). Defaults to current working directory.",
|
40
|
+
required: false
|
41
|
+
|
42
|
+
param :glob,
|
43
|
+
type: "string",
|
44
|
+
desc: "Glob pattern to filter files (e.g. \"*.js\", \"*.{ts,tsx}\") - maps to rg --glob",
|
45
|
+
required: false
|
46
|
+
|
47
|
+
param :type,
|
48
|
+
type: "string",
|
49
|
+
desc: "File type to search (rg --type). Common types: js, py, rust, go, java, etc.",
|
50
|
+
required: false
|
51
|
+
|
52
|
+
param :output_mode,
|
53
|
+
type: "string",
|
54
|
+
desc: "Output mode: \"content\" shows matching lines (supports -A/-B/-C context, -n line numbers, head_limit), \"files_with_matches\" shows file paths (supports head_limit), \"count\" shows match counts (supports head_limit). Defaults to \"files_with_matches\".",
|
55
|
+
required: false
|
56
|
+
|
57
|
+
param :"-i",
|
58
|
+
type: "boolean",
|
59
|
+
desc: "Case insensitive search (rg -i)",
|
60
|
+
required: false
|
61
|
+
|
62
|
+
param :multiline,
|
63
|
+
type: "boolean",
|
64
|
+
desc: "Enable multiline mode where . matches newlines and patterns can span lines (rg -U --multiline-dotall)",
|
65
|
+
required: false
|
66
|
+
|
67
|
+
param :"-B",
|
68
|
+
type: "integer",
|
69
|
+
desc: "Number of lines to show before each match (rg -B). Requires output_mode: \"content\", ignored otherwise.",
|
70
|
+
required: false
|
71
|
+
|
72
|
+
param :"-A",
|
73
|
+
type: "integer",
|
74
|
+
desc: "Number of lines to show after each match (rg -A). Requires output_mode: \"content\", ignored otherwise.",
|
75
|
+
required: false
|
76
|
+
|
77
|
+
param :"-C",
|
78
|
+
type: "integer",
|
79
|
+
desc: "Number of lines to show before and after each match (rg -C). Requires output_mode: \"content\", ignored otherwise.",
|
80
|
+
required: false
|
81
|
+
|
82
|
+
param :"-n",
|
83
|
+
type: "boolean",
|
84
|
+
desc: "Show line numbers in output (rg -n). Requires output_mode: \"content\", ignored otherwise.",
|
85
|
+
required: false
|
86
|
+
|
87
|
+
param :head_limit,
|
88
|
+
type: "integer",
|
89
|
+
desc: "Limit output to first N lines/entries, equivalent to \"| head -N\". Works across all output modes: content (limits output lines), files_with_matches (limits file paths), count (limits count entries). When unspecified, shows all results from ripgrep.",
|
90
|
+
required: false
|
91
|
+
|
92
|
+
def execute(
|
93
|
+
pattern:,
|
94
|
+
path: nil,
|
95
|
+
glob: nil,
|
96
|
+
type: nil,
|
97
|
+
output_mode: "files_with_matches",
|
98
|
+
**options
|
99
|
+
)
|
100
|
+
# Validate inputs
|
101
|
+
return validation_error("pattern is required") if pattern.nil? || pattern.empty?
|
102
|
+
|
103
|
+
# CRITICAL: Default path to agent's directory (NOT current directory)
|
104
|
+
path = if path.nil? || path.to_s.strip.empty?
|
105
|
+
@directory
|
106
|
+
else
|
107
|
+
# Resolve relative paths against agent directory
|
108
|
+
resolve_path(path)
|
109
|
+
end
|
110
|
+
|
111
|
+
# Extract options with their flag names
|
112
|
+
case_insensitive = options["-i"] || false
|
113
|
+
multiline = options[:multiline] || false
|
114
|
+
context_before = options["-B"]
|
115
|
+
context_after = options["-A"]
|
116
|
+
context = options["-C"]
|
117
|
+
line_numbers = options["-n"] || false
|
118
|
+
head_limit = options[:head_limit]
|
119
|
+
|
120
|
+
# Validate output_mode
|
121
|
+
valid_modes = ["content", "files_with_matches", "count"]
|
122
|
+
unless valid_modes.include?(output_mode)
|
123
|
+
return validation_error("output_mode must be one of: #{valid_modes.join(", ")}")
|
124
|
+
end
|
125
|
+
|
126
|
+
# Build ripgrep command
|
127
|
+
cmd = ["rg"]
|
128
|
+
|
129
|
+
# Output mode flags
|
130
|
+
case output_mode
|
131
|
+
when "files_with_matches"
|
132
|
+
cmd << "-l" # List files with matches
|
133
|
+
when "count"
|
134
|
+
cmd << "-c" # Count matches per file
|
135
|
+
when "content"
|
136
|
+
# Default mode, no special flag needed
|
137
|
+
# Add line numbers if requested
|
138
|
+
cmd << "-n" if line_numbers
|
139
|
+
|
140
|
+
# Add context flags
|
141
|
+
cmd << "-B" << context_before.to_s if context_before
|
142
|
+
cmd << "-A" << context_after.to_s if context_after
|
143
|
+
cmd << "-C" << context.to_s if context
|
144
|
+
end
|
145
|
+
|
146
|
+
# Case sensitivity
|
147
|
+
cmd << "-i" if case_insensitive
|
148
|
+
|
149
|
+
# Multiline mode
|
150
|
+
if multiline
|
151
|
+
cmd << "-U" << "--multiline-dotall"
|
152
|
+
end
|
153
|
+
|
154
|
+
# File filtering (only add if non-empty)
|
155
|
+
cmd << "--type" << type if type && !type.to_s.strip.empty?
|
156
|
+
cmd << "--glob" << glob if glob && !glob.to_s.strip.empty?
|
157
|
+
|
158
|
+
# Pattern
|
159
|
+
cmd << "-e" << pattern
|
160
|
+
|
161
|
+
# Path
|
162
|
+
cmd << path
|
163
|
+
|
164
|
+
# Execute command
|
165
|
+
begin
|
166
|
+
require "open3"
|
167
|
+
|
168
|
+
stdout, stderr, status = Open3.capture3(*cmd)
|
169
|
+
|
170
|
+
# Handle no matches (exit code 1 for ripgrep means no matches found)
|
171
|
+
if status.exitstatus == 1 && stderr.empty?
|
172
|
+
return "No matches found for pattern: #{pattern}"
|
173
|
+
end
|
174
|
+
|
175
|
+
# Handle errors (exit code 2 means error)
|
176
|
+
if status.exitstatus == 2 || !stderr.empty?
|
177
|
+
return error("ripgrep error: #{stderr}")
|
178
|
+
end
|
179
|
+
|
180
|
+
# Success - format output
|
181
|
+
output = stdout
|
182
|
+
|
183
|
+
# Apply head_limit if specified
|
184
|
+
if head_limit && head_limit > 0
|
185
|
+
lines = output.lines
|
186
|
+
if lines.count > head_limit
|
187
|
+
output = lines.take(head_limit).join
|
188
|
+
output += "\n\n<system-reminder>Output limited to first #{head_limit} lines. Total results: #{lines.count} lines.</system-reminder>"
|
189
|
+
end
|
190
|
+
end
|
191
|
+
|
192
|
+
# Add reminder about usage
|
193
|
+
reminder = build_usage_reminder(output_mode, pattern)
|
194
|
+
output = "#{output}\n\n#{reminder}" unless reminder.empty?
|
195
|
+
|
196
|
+
output.empty? ? "No matches found for pattern: #{pattern}" : output
|
197
|
+
rescue Errno::ENOENT
|
198
|
+
error("ripgrep (rg) is not installed or not in PATH. Please install ripgrep to use the Grep tool.")
|
199
|
+
rescue Errno::EACCES
|
200
|
+
error("Permission denied: Cannot search in '#{path}'")
|
201
|
+
rescue StandardError => e
|
202
|
+
error("Failed to execute search: #{e.class.name} - #{e.message}")
|
203
|
+
end
|
204
|
+
rescue StandardError => e
|
205
|
+
error("Unexpected error during search: #{e.class.name} - #{e.message}")
|
206
|
+
end
|
207
|
+
|
208
|
+
private
|
209
|
+
|
210
|
+
def validation_error(message)
|
211
|
+
"<tool_use_error>InputValidationError: #{message}</tool_use_error>"
|
212
|
+
end
|
213
|
+
|
214
|
+
def error(message)
|
215
|
+
"Error: #{message}"
|
216
|
+
end
|
217
|
+
|
218
|
+
def build_usage_reminder(output_mode, pattern)
|
219
|
+
return "" if output_mode == "content"
|
220
|
+
|
221
|
+
<<~REMINDER
|
222
|
+
<system-reminder>
|
223
|
+
You used output_mode: '#{output_mode}' which only shows #{output_mode == "files_with_matches" ? "file paths" : "match counts"}.
|
224
|
+
To see the actual matching lines and their content, use output_mode: 'content'.
|
225
|
+
You can also add -n: true and context lines (-B, -A, or -C) for better context.
|
226
|
+
</system-reminder>
|
227
|
+
REMINDER
|
228
|
+
end
|
229
|
+
end
|
230
|
+
end
|
231
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SwarmSDK
|
4
|
+
module Tools
|
5
|
+
module ImageExtractors
|
6
|
+
# Extracts images from DOCX documents
|
7
|
+
# DOCX files are ZIP archives with images stored in word/media/
|
8
|
+
class DocxImageExtractor
|
9
|
+
class << self
|
10
|
+
# Extract all images from a DOCX document
|
11
|
+
# @param doc [Docx::Document] The DOCX document instance
|
12
|
+
# @param docx_path [String] Path to the DOCX file
|
13
|
+
# @return [Array<String>] Array of temporary file paths containing extracted images
|
14
|
+
def extract_images(doc, docx_path)
|
15
|
+
image_paths = []
|
16
|
+
temp_dir = Dir.mktmpdir("docx_images_#{File.basename(docx_path, ".*")}")
|
17
|
+
|
18
|
+
# DOCX files are ZIP archives with images in word/media/
|
19
|
+
doc.zip.glob("word/media/*").each do |entry|
|
20
|
+
next unless entry.file?
|
21
|
+
|
22
|
+
# Check if it's an image by extension
|
23
|
+
next unless entry.name.match?(/\.(png|jpe?g|gif|bmp|tiff?)$/i)
|
24
|
+
|
25
|
+
output_path = File.join(temp_dir, File.basename(entry.name))
|
26
|
+
|
27
|
+
File.open(output_path, "wb") do |f|
|
28
|
+
f.write(doc.zip.read(entry.name))
|
29
|
+
end
|
30
|
+
|
31
|
+
image_paths << output_path
|
32
|
+
end
|
33
|
+
|
34
|
+
image_paths
|
35
|
+
rescue StandardError
|
36
|
+
# If image extraction fails, don't fail the entire document read
|
37
|
+
[]
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
@@ -0,0 +1,163 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SwarmSDK
|
4
|
+
module Tools
|
5
|
+
module ImageExtractors
|
6
|
+
# Extracts images from PDF documents
|
7
|
+
# Supports JPEG (DCTDecode), FlateDecode, and LZWDecode formats
|
8
|
+
# Converts non-JPEG images to TIFF format
|
9
|
+
class PdfImageExtractor
|
10
|
+
class << self
|
11
|
+
# Extract all images from a PDF document
|
12
|
+
# @param reader [PDF::Reader] The PDF reader instance
|
13
|
+
# @param pdf_path [String] Path to the PDF file
|
14
|
+
# @return [Array<String>] Array of temporary file paths containing extracted images
|
15
|
+
def extract_images(reader, pdf_path)
|
16
|
+
image_paths = []
|
17
|
+
temp_dir = Dir.mktmpdir("pdf_images_#{File.basename(pdf_path, ".*")}")
|
18
|
+
|
19
|
+
reader.pages.each_with_index do |page, page_index|
|
20
|
+
page_images = extract_from_page(page, page_index + 1, temp_dir)
|
21
|
+
image_paths.concat(page_images)
|
22
|
+
end
|
23
|
+
|
24
|
+
image_paths
|
25
|
+
rescue StandardError
|
26
|
+
# If image extraction fails, log it but don't fail the entire PDF read
|
27
|
+
[]
|
28
|
+
end
|
29
|
+
|
30
|
+
# Extract images from a single PDF page
|
31
|
+
# @param page [PDF::Reader::Page] The PDF page
|
32
|
+
# @param page_number [Integer] Page number (1-indexed)
|
33
|
+
# @param temp_dir [String] Directory to save extracted images
|
34
|
+
# @return [Array<String>] Array of file paths for extracted images
|
35
|
+
def extract_from_page(page, page_number, temp_dir)
|
36
|
+
extracted_files = []
|
37
|
+
|
38
|
+
# Get XObjects (external objects) from the page
|
39
|
+
xobjects = page.xobjects
|
40
|
+
return extracted_files if xobjects.empty?
|
41
|
+
|
42
|
+
xobjects.each do |name, stream|
|
43
|
+
# Only process Image XObjects (not Form XObjects)
|
44
|
+
next unless stream.hash[:Subtype] == :Image
|
45
|
+
|
46
|
+
file_path = save_image(stream, page_number, name, temp_dir)
|
47
|
+
extracted_files << file_path if file_path
|
48
|
+
end
|
49
|
+
|
50
|
+
extracted_files
|
51
|
+
rescue StandardError
|
52
|
+
# If extraction fails for this page, continue with others
|
53
|
+
[]
|
54
|
+
end
|
55
|
+
|
56
|
+
# Save a PDF image stream to disk
|
57
|
+
# Supports JPEG (DCTDecode) and raw formats
|
58
|
+
# @param stream [PDF::Reader::Stream] The image stream
|
59
|
+
# @param page_number [Integer] Page number
|
60
|
+
# @param name [Symbol] Image name from XObject
|
61
|
+
# @param temp_dir [String] Directory to save the image
|
62
|
+
# @return [String, nil] File path if successful, nil otherwise
|
63
|
+
def save_image(stream, page_number, name, temp_dir)
|
64
|
+
filter = stream.hash[:Filter]
|
65
|
+
|
66
|
+
case filter
|
67
|
+
when :DCTDecode
|
68
|
+
# JPEG images can be saved directly
|
69
|
+
save_jpeg(stream, page_number, name, temp_dir)
|
70
|
+
when :FlateDecode, :LZWDecode, nil
|
71
|
+
# Raw or compressed formats - save as TIFF
|
72
|
+
save_as_tiff(stream, page_number, name, temp_dir)
|
73
|
+
end
|
74
|
+
# Unsupported formats return nil
|
75
|
+
rescue StandardError
|
76
|
+
# If saving fails, skip this image
|
77
|
+
nil
|
78
|
+
end
|
79
|
+
|
80
|
+
# Save JPEG image directly from PDF stream
|
81
|
+
# @param stream [PDF::Reader::Stream] The image stream
|
82
|
+
# @param page_number [Integer] Page number
|
83
|
+
# @param name [Symbol] Image name
|
84
|
+
# @param temp_dir [String] Directory to save the image
|
85
|
+
# @return [String] File path
|
86
|
+
def save_jpeg(stream, page_number, name, temp_dir)
|
87
|
+
filename = File.join(temp_dir, "page-#{page_number}-#{name}.jpg")
|
88
|
+
|
89
|
+
# JPEG images can be written directly - the stream.data contains a complete JPEG file
|
90
|
+
File.open(filename, "wb") do |file|
|
91
|
+
file.write(stream.data)
|
92
|
+
end
|
93
|
+
|
94
|
+
filename
|
95
|
+
end
|
96
|
+
|
97
|
+
# Save raw image data as TIFF
|
98
|
+
# @param stream [PDF::Reader::Stream] The image stream
|
99
|
+
# @param page_number [Integer] Page number
|
100
|
+
# @param name [Symbol] Image name
|
101
|
+
# @param temp_dir [String] Directory to save the image
|
102
|
+
# @return [String, nil] File path if successful, nil for unsupported color spaces
|
103
|
+
def save_as_tiff(stream, page_number, name, temp_dir)
|
104
|
+
color_space = stream.hash[:ColorSpace]
|
105
|
+
|
106
|
+
case color_space
|
107
|
+
when :DeviceRGB
|
108
|
+
save_rgb_tiff(stream, page_number, name, temp_dir)
|
109
|
+
when :DeviceGray
|
110
|
+
save_gray_tiff(stream, page_number, name, temp_dir)
|
111
|
+
end
|
112
|
+
# Unsupported color spaces return nil
|
113
|
+
rescue StandardError
|
114
|
+
# If conversion fails, skip this image
|
115
|
+
nil
|
116
|
+
end
|
117
|
+
|
118
|
+
# Save RGB image as TIFF
|
119
|
+
# @param stream [PDF::Reader::Stream] The image stream
|
120
|
+
# @param page_number [Integer] Page number
|
121
|
+
# @param name [Symbol] Image name
|
122
|
+
# @param temp_dir [String] Directory to save the image
|
123
|
+
# @return [String] File path
|
124
|
+
def save_rgb_tiff(stream, page_number, name, temp_dir)
|
125
|
+
filename = File.join(temp_dir, "page-#{page_number}-#{name}.tif")
|
126
|
+
|
127
|
+
width = stream.hash[:Width]
|
128
|
+
height = stream.hash[:Height]
|
129
|
+
bpc = stream.hash[:BitsPerComponent] || 8
|
130
|
+
|
131
|
+
# Build TIFF header
|
132
|
+
tiff = ImageFormats::TiffBuilder.build_rgb_header(width, height, bpc)
|
133
|
+
tiff << stream.unfiltered_data # Get decompressed raw pixel data
|
134
|
+
|
135
|
+
File.open(filename, "wb") { |file| file.write(tiff) }
|
136
|
+
filename
|
137
|
+
end
|
138
|
+
|
139
|
+
# Save grayscale image as TIFF
|
140
|
+
# @param stream [PDF::Reader::Stream] The image stream
|
141
|
+
# @param page_number [Integer] Page number
|
142
|
+
# @param name [Symbol] Image name
|
143
|
+
# @param temp_dir [String] Directory to save the image
|
144
|
+
# @return [String] File path
|
145
|
+
def save_gray_tiff(stream, page_number, name, temp_dir)
|
146
|
+
filename = File.join(temp_dir, "page-#{page_number}-#{name}.tif")
|
147
|
+
|
148
|
+
width = stream.hash[:Width]
|
149
|
+
height = stream.hash[:Height]
|
150
|
+
bpc = stream.hash[:BitsPerComponent] || 8
|
151
|
+
|
152
|
+
# Build TIFF header for grayscale
|
153
|
+
tiff = ImageFormats::TiffBuilder.build_gray_header(width, height, bpc)
|
154
|
+
tiff << stream.unfiltered_data
|
155
|
+
|
156
|
+
File.open(filename, "wb") { |file| file.write(tiff) }
|
157
|
+
filename
|
158
|
+
end
|
159
|
+
end
|
160
|
+
end
|
161
|
+
end
|
162
|
+
end
|
163
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module SwarmSDK
|
4
|
+
module Tools
|
5
|
+
module ImageFormats
|
6
|
+
# Builds TIFF image files from raw pixel data
|
7
|
+
# Supports RGB and grayscale color spaces
|
8
|
+
class TiffBuilder
|
9
|
+
class << self
|
10
|
+
# Build TIFF header for RGB images
|
11
|
+
# @param width [Integer] Image width in pixels
|
12
|
+
# @param height [Integer] Image height in pixels
|
13
|
+
# @param bpc [Integer] Bits per component (typically 8)
|
14
|
+
# @return [String] Binary TIFF header
|
15
|
+
def build_rgb_header(width, height, bpc)
|
16
|
+
# Helper lambdas for TIFF tags
|
17
|
+
long_tag = ->(tag, count, value) { [tag, 4, count, value].pack("ssII") }
|
18
|
+
short_tag = ->(tag, count, value) { [tag, 3, count, value].pack("ssII") }
|
19
|
+
|
20
|
+
tag_count = 8
|
21
|
+
header = [73, 73, 42, 8, tag_count].pack("ccsIs") # Little-endian TIFF
|
22
|
+
|
23
|
+
tiff = header.dup
|
24
|
+
tiff << short_tag.call(256, 1, width) # ImageWidth
|
25
|
+
tiff << short_tag.call(257, 1, height) # ImageHeight
|
26
|
+
tiff << long_tag.call(258, 3, header.size + (tag_count * 12) + 4) # BitsPerSample
|
27
|
+
tiff << short_tag.call(259, 1, 1) # Compression (none)
|
28
|
+
tiff << short_tag.call(262, 1, 2) # PhotometricInterpretation (RGB)
|
29
|
+
tiff << long_tag.call(273, 1, header.size + (tag_count * 12) + 16) # StripOffsets
|
30
|
+
tiff << short_tag.call(277, 1, 3) # SamplesPerPixel
|
31
|
+
tiff << long_tag.call(279, 1, width * height * 3) # StripByteCounts
|
32
|
+
tiff << [0].pack("I") # Next IFD pointer
|
33
|
+
tiff << [bpc, bpc, bpc].pack("III") # BitsPerSample values
|
34
|
+
tiff
|
35
|
+
end
|
36
|
+
|
37
|
+
# Build TIFF header for grayscale images
|
38
|
+
# @param width [Integer] Image width in pixels
|
39
|
+
# @param height [Integer] Image height in pixels
|
40
|
+
# @param bpc [Integer] Bits per component (typically 8)
|
41
|
+
# @return [String] Binary TIFF header
|
42
|
+
def build_gray_header(width, height, bpc)
|
43
|
+
long_tag = ->(tag, count, value) { [tag, 4, count, value].pack("ssII") }
|
44
|
+
short_tag = ->(tag, count, value) { [tag, 3, count, value].pack("ssII") }
|
45
|
+
|
46
|
+
tag_count = 8
|
47
|
+
header = [73, 73, 42, 8, tag_count].pack("ccsIs")
|
48
|
+
|
49
|
+
tiff = header.dup
|
50
|
+
tiff << short_tag.call(256, 1, width) # ImageWidth
|
51
|
+
tiff << short_tag.call(257, 1, height) # ImageHeight
|
52
|
+
tiff << short_tag.call(258, 1, bpc) # BitsPerSample
|
53
|
+
tiff << short_tag.call(259, 1, 1) # Compression (none)
|
54
|
+
tiff << short_tag.call(262, 1, 1) # PhotometricInterpretation (MinIsBlack)
|
55
|
+
tiff << long_tag.call(273, 1, header.size + (tag_count * 12) + 4) # StripOffsets
|
56
|
+
tiff << short_tag.call(277, 1, 1) # SamplesPerPixel
|
57
|
+
tiff << long_tag.call(279, 1, width * height) # StripByteCounts
|
58
|
+
tiff << [0].pack("I") # Next IFD pointer
|
59
|
+
tiff
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|