swarm_sdk 2.7.14 → 3.0.0.alpha2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. checksums.yaml +4 -4
  2. data/lib/swarm_sdk/ruby_llm_patches/chat_callbacks_patch.rb +16 -0
  3. data/lib/swarm_sdk/ruby_llm_patches/init.rb +4 -1
  4. data/lib/swarm_sdk/v3/agent.rb +1165 -0
  5. data/lib/swarm_sdk/v3/agent_builder.rb +533 -0
  6. data/lib/swarm_sdk/v3/agent_definition.rb +330 -0
  7. data/lib/swarm_sdk/v3/configuration.rb +490 -0
  8. data/lib/swarm_sdk/v3/debug_log.rb +86 -0
  9. data/lib/swarm_sdk/v3/event_stream.rb +130 -0
  10. data/lib/swarm_sdk/v3/hooks/context.rb +112 -0
  11. data/lib/swarm_sdk/v3/hooks/result.rb +115 -0
  12. data/lib/swarm_sdk/v3/hooks/runner.rb +128 -0
  13. data/lib/swarm_sdk/v3/mcp/connector.rb +183 -0
  14. data/lib/swarm_sdk/v3/mcp/mcp_error.rb +15 -0
  15. data/lib/swarm_sdk/v3/mcp/server_definition.rb +125 -0
  16. data/lib/swarm_sdk/v3/mcp/ssl_http_transport.rb +103 -0
  17. data/lib/swarm_sdk/v3/mcp/stdio_transport.rb +135 -0
  18. data/lib/swarm_sdk/v3/mcp/tool_proxy.rb +53 -0
  19. data/lib/swarm_sdk/v3/memory/adapters/base.rb +297 -0
  20. data/lib/swarm_sdk/v3/memory/adapters/faiss_support.rb +194 -0
  21. data/lib/swarm_sdk/v3/memory/adapters/filesystem_adapter.rb +212 -0
  22. data/lib/swarm_sdk/v3/memory/adapters/sqlite_adapter.rb +507 -0
  23. data/lib/swarm_sdk/v3/memory/adapters/vector_utils.rb +88 -0
  24. data/lib/swarm_sdk/v3/memory/card.rb +206 -0
  25. data/lib/swarm_sdk/v3/memory/cluster.rb +146 -0
  26. data/lib/swarm_sdk/v3/memory/compressor.rb +496 -0
  27. data/lib/swarm_sdk/v3/memory/consolidator.rb +427 -0
  28. data/lib/swarm_sdk/v3/memory/context_builder.rb +339 -0
  29. data/lib/swarm_sdk/v3/memory/edge.rb +105 -0
  30. data/lib/swarm_sdk/v3/memory/embedder.rb +185 -0
  31. data/lib/swarm_sdk/v3/memory/exposure_tracker.rb +104 -0
  32. data/lib/swarm_sdk/v3/memory/ingestion_pipeline.rb +394 -0
  33. data/lib/swarm_sdk/v3/memory/retriever.rb +289 -0
  34. data/lib/swarm_sdk/v3/memory/store.rb +489 -0
  35. data/lib/swarm_sdk/v3/skills/loader.rb +147 -0
  36. data/lib/swarm_sdk/v3/skills/manifest.rb +45 -0
  37. data/lib/swarm_sdk/v3/sub_task_agent.rb +248 -0
  38. data/lib/swarm_sdk/v3/tools/base.rb +80 -0
  39. data/lib/swarm_sdk/v3/tools/bash.rb +174 -0
  40. data/lib/swarm_sdk/v3/tools/clock.rb +32 -0
  41. data/lib/swarm_sdk/v3/tools/document_converters/base.rb +84 -0
  42. data/lib/swarm_sdk/v3/tools/document_converters/docx_converter.rb +120 -0
  43. data/lib/swarm_sdk/v3/tools/document_converters/pdf_converter.rb +111 -0
  44. data/lib/swarm_sdk/v3/tools/document_converters/xlsx_converter.rb +128 -0
  45. data/lib/swarm_sdk/v3/tools/edit.rb +111 -0
  46. data/lib/swarm_sdk/v3/tools/glob.rb +96 -0
  47. data/lib/swarm_sdk/v3/tools/grep.rb +200 -0
  48. data/lib/swarm_sdk/v3/tools/message_teammate.rb +15 -0
  49. data/lib/swarm_sdk/v3/tools/message_user.rb +15 -0
  50. data/lib/swarm_sdk/v3/tools/read.rb +213 -0
  51. data/lib/swarm_sdk/v3/tools/read_tracker.rb +40 -0
  52. data/lib/swarm_sdk/v3/tools/registry.rb +208 -0
  53. data/lib/swarm_sdk/v3/tools/sub_task.rb +183 -0
  54. data/lib/swarm_sdk/v3/tools/think.rb +88 -0
  55. data/lib/swarm_sdk/v3/tools/write.rb +87 -0
  56. data/lib/swarm_sdk/v3.rb +145 -0
  57. metadata +88 -149
  58. data/lib/swarm_sdk/agent/RETRY_LOGIC.md +0 -175
  59. data/lib/swarm_sdk/agent/builder.rb +0 -705
  60. data/lib/swarm_sdk/agent/chat.rb +0 -1438
  61. data/lib/swarm_sdk/agent/chat_helpers/context_tracker.rb +0 -375
  62. data/lib/swarm_sdk/agent/chat_helpers/event_emitter.rb +0 -204
  63. data/lib/swarm_sdk/agent/chat_helpers/hook_integration.rb +0 -480
  64. data/lib/swarm_sdk/agent/chat_helpers/instrumentation.rb +0 -85
  65. data/lib/swarm_sdk/agent/chat_helpers/llm_configuration.rb +0 -290
  66. data/lib/swarm_sdk/agent/chat_helpers/logging_helpers.rb +0 -116
  67. data/lib/swarm_sdk/agent/chat_helpers/serialization.rb +0 -83
  68. data/lib/swarm_sdk/agent/chat_helpers/system_reminder_injector.rb +0 -134
  69. data/lib/swarm_sdk/agent/chat_helpers/system_reminders.rb +0 -79
  70. data/lib/swarm_sdk/agent/chat_helpers/token_tracking.rb +0 -146
  71. data/lib/swarm_sdk/agent/context.rb +0 -115
  72. data/lib/swarm_sdk/agent/context_manager.rb +0 -315
  73. data/lib/swarm_sdk/agent/definition.rb +0 -588
  74. data/lib/swarm_sdk/agent/llm_instrumentation_middleware.rb +0 -226
  75. data/lib/swarm_sdk/agent/system_prompt_builder.rb +0 -173
  76. data/lib/swarm_sdk/agent/tool_registry.rb +0 -189
  77. data/lib/swarm_sdk/agent_registry.rb +0 -146
  78. data/lib/swarm_sdk/builders/base_builder.rb +0 -558
  79. data/lib/swarm_sdk/claude_code_agent_adapter.rb +0 -205
  80. data/lib/swarm_sdk/concerns/cleanupable.rb +0 -42
  81. data/lib/swarm_sdk/concerns/snapshotable.rb +0 -67
  82. data/lib/swarm_sdk/concerns/validatable.rb +0 -55
  83. data/lib/swarm_sdk/config.rb +0 -368
  84. data/lib/swarm_sdk/configuration/parser.rb +0 -397
  85. data/lib/swarm_sdk/configuration/translator.rb +0 -285
  86. data/lib/swarm_sdk/configuration.rb +0 -165
  87. data/lib/swarm_sdk/context_compactor/metrics.rb +0 -147
  88. data/lib/swarm_sdk/context_compactor/token_counter.rb +0 -102
  89. data/lib/swarm_sdk/context_compactor.rb +0 -335
  90. data/lib/swarm_sdk/context_management/builder.rb +0 -128
  91. data/lib/swarm_sdk/context_management/context.rb +0 -328
  92. data/lib/swarm_sdk/custom_tool_registry.rb +0 -226
  93. data/lib/swarm_sdk/defaults.rb +0 -251
  94. data/lib/swarm_sdk/events_to_messages.rb +0 -199
  95. data/lib/swarm_sdk/hooks/adapter.rb +0 -359
  96. data/lib/swarm_sdk/hooks/context.rb +0 -197
  97. data/lib/swarm_sdk/hooks/definition.rb +0 -80
  98. data/lib/swarm_sdk/hooks/error.rb +0 -29
  99. data/lib/swarm_sdk/hooks/executor.rb +0 -146
  100. data/lib/swarm_sdk/hooks/registry.rb +0 -147
  101. data/lib/swarm_sdk/hooks/result.rb +0 -150
  102. data/lib/swarm_sdk/hooks/shell_executor.rb +0 -256
  103. data/lib/swarm_sdk/hooks/tool_call.rb +0 -35
  104. data/lib/swarm_sdk/hooks/tool_result.rb +0 -62
  105. data/lib/swarm_sdk/log_collector.rb +0 -227
  106. data/lib/swarm_sdk/log_stream.rb +0 -127
  107. data/lib/swarm_sdk/markdown_parser.rb +0 -75
  108. data/lib/swarm_sdk/model_aliases.json +0 -8
  109. data/lib/swarm_sdk/models.json +0 -44002
  110. data/lib/swarm_sdk/models.rb +0 -161
  111. data/lib/swarm_sdk/node_context.rb +0 -245
  112. data/lib/swarm_sdk/observer/builder.rb +0 -81
  113. data/lib/swarm_sdk/observer/config.rb +0 -45
  114. data/lib/swarm_sdk/observer/manager.rb +0 -248
  115. data/lib/swarm_sdk/patterns/agent_observer.rb +0 -160
  116. data/lib/swarm_sdk/permissions/config.rb +0 -239
  117. data/lib/swarm_sdk/permissions/error_formatter.rb +0 -121
  118. data/lib/swarm_sdk/permissions/path_matcher.rb +0 -35
  119. data/lib/swarm_sdk/permissions/validator.rb +0 -173
  120. data/lib/swarm_sdk/permissions_builder.rb +0 -122
  121. data/lib/swarm_sdk/plugin.rb +0 -309
  122. data/lib/swarm_sdk/plugin_registry.rb +0 -101
  123. data/lib/swarm_sdk/proc_helpers.rb +0 -53
  124. data/lib/swarm_sdk/prompts/base_system_prompt.md.erb +0 -119
  125. data/lib/swarm_sdk/restore_result.rb +0 -65
  126. data/lib/swarm_sdk/result.rb +0 -241
  127. data/lib/swarm_sdk/snapshot.rb +0 -156
  128. data/lib/swarm_sdk/snapshot_from_events.rb +0 -397
  129. data/lib/swarm_sdk/state_restorer.rb +0 -476
  130. data/lib/swarm_sdk/state_snapshot.rb +0 -334
  131. data/lib/swarm_sdk/swarm/agent_initializer.rb +0 -648
  132. data/lib/swarm_sdk/swarm/all_agents_builder.rb +0 -204
  133. data/lib/swarm_sdk/swarm/builder.rb +0 -256
  134. data/lib/swarm_sdk/swarm/executor.rb +0 -446
  135. data/lib/swarm_sdk/swarm/hook_triggers.rb +0 -162
  136. data/lib/swarm_sdk/swarm/lazy_delegate_chat.rb +0 -372
  137. data/lib/swarm_sdk/swarm/logging_callbacks.rb +0 -361
  138. data/lib/swarm_sdk/swarm/mcp_configurator.rb +0 -290
  139. data/lib/swarm_sdk/swarm/swarm_registry_builder.rb +0 -67
  140. data/lib/swarm_sdk/swarm/tool_configurator.rb +0 -392
  141. data/lib/swarm_sdk/swarm.rb +0 -973
  142. data/lib/swarm_sdk/swarm_loader.rb +0 -145
  143. data/lib/swarm_sdk/swarm_registry.rb +0 -136
  144. data/lib/swarm_sdk/tools/base.rb +0 -63
  145. data/lib/swarm_sdk/tools/bash.rb +0 -280
  146. data/lib/swarm_sdk/tools/clock.rb +0 -46
  147. data/lib/swarm_sdk/tools/delegate.rb +0 -389
  148. data/lib/swarm_sdk/tools/document_converters/base_converter.rb +0 -83
  149. data/lib/swarm_sdk/tools/document_converters/docx_converter.rb +0 -99
  150. data/lib/swarm_sdk/tools/document_converters/html_converter.rb +0 -101
  151. data/lib/swarm_sdk/tools/document_converters/pdf_converter.rb +0 -78
  152. data/lib/swarm_sdk/tools/document_converters/xlsx_converter.rb +0 -194
  153. data/lib/swarm_sdk/tools/edit.rb +0 -145
  154. data/lib/swarm_sdk/tools/glob.rb +0 -166
  155. data/lib/swarm_sdk/tools/grep.rb +0 -235
  156. data/lib/swarm_sdk/tools/image_extractors/docx_image_extractor.rb +0 -43
  157. data/lib/swarm_sdk/tools/image_extractors/pdf_image_extractor.rb +0 -167
  158. data/lib/swarm_sdk/tools/image_formats/tiff_builder.rb +0 -65
  159. data/lib/swarm_sdk/tools/mcp_tool_stub.rb +0 -198
  160. data/lib/swarm_sdk/tools/multi_edit.rb +0 -236
  161. data/lib/swarm_sdk/tools/path_resolver.rb +0 -92
  162. data/lib/swarm_sdk/tools/read.rb +0 -261
  163. data/lib/swarm_sdk/tools/registry.rb +0 -205
  164. data/lib/swarm_sdk/tools/scratchpad/scratchpad_list.rb +0 -117
  165. data/lib/swarm_sdk/tools/scratchpad/scratchpad_read.rb +0 -97
  166. data/lib/swarm_sdk/tools/scratchpad/scratchpad_write.rb +0 -108
  167. data/lib/swarm_sdk/tools/stores/read_tracker.rb +0 -96
  168. data/lib/swarm_sdk/tools/stores/scratchpad_storage.rb +0 -273
  169. data/lib/swarm_sdk/tools/stores/storage.rb +0 -142
  170. data/lib/swarm_sdk/tools/stores/todo_manager.rb +0 -65
  171. data/lib/swarm_sdk/tools/think.rb +0 -100
  172. data/lib/swarm_sdk/tools/todo_write.rb +0 -237
  173. data/lib/swarm_sdk/tools/web_fetch.rb +0 -264
  174. data/lib/swarm_sdk/tools/write.rb +0 -112
  175. data/lib/swarm_sdk/transcript_builder.rb +0 -278
  176. data/lib/swarm_sdk/utils.rb +0 -68
  177. data/lib/swarm_sdk/validation_result.rb +0 -33
  178. data/lib/swarm_sdk/version.rb +0 -5
  179. data/lib/swarm_sdk/workflow/agent_config.rb +0 -95
  180. data/lib/swarm_sdk/workflow/builder.rb +0 -227
  181. data/lib/swarm_sdk/workflow/executor.rb +0 -497
  182. data/lib/swarm_sdk/workflow/node_builder.rb +0 -593
  183. data/lib/swarm_sdk/workflow/transformer_executor.rb +0 -250
  184. data/lib/swarm_sdk/workflow.rb +0 -589
  185. data/lib/swarm_sdk.rb +0 -721
@@ -1,235 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module SwarmSDK
4
- module Tools
5
- # Grep tool for searching file contents using ripgrep-style patterns
6
- #
7
- # Powerful search capabilities with regex support, context lines, and filtering.
8
- # Built on ripgrep (rg) for fast, efficient searching.
9
- class Grep < Base
10
- include PathResolver
11
-
12
- # Factory pattern: declare what parameters this tool needs for instantiation
13
- class << self
14
- def creation_requirements
15
- [:directory]
16
- end
17
- end
18
-
19
- def initialize(directory:)
20
- super()
21
- @directory = File.expand_path(directory)
22
- end
23
-
24
- define_method(:name) { "Grep" }
25
-
26
- description <<~DESC
27
- A powerful search tool built on ripgrep
28
-
29
- Usage:
30
- - ALWAYS use Grep for search tasks. NEVER invoke `grep` or `rg` as a Bash command. The Grep tool has been optimized for correct permissions and access.
31
- - Supports full regex syntax (e.g., "log.*Error", "function\\s+\\w+")
32
- - Filter files with glob parameter (e.g., "*.js", "**/*.tsx") or type parameter (e.g., "js", "py", "rust")
33
- - Output modes: "content" shows matching lines, "files_with_matches" shows only file paths (default), "count" shows match counts
34
- - Use Task tool for open-ended searches requiring multiple rounds
35
- - Pattern syntax: Uses ripgrep (not grep) - literal braces need escaping (use `interface\\{\\}` to find `interface{}` in Go code)
36
- - Multiline matching: By default patterns match within single lines only. For cross-line patterns like `struct \\{[\\s\\S]*?field`, use `multiline: true`
37
- DESC
38
-
39
- param :pattern,
40
- type: "string",
41
- desc: "The regular expression pattern to search for in file contents",
42
- required: true
43
-
44
- param :path,
45
- type: "string",
46
- desc: "File or directory to search in (rg PATH). Defaults to current working directory.",
47
- required: false
48
-
49
- param :glob,
50
- type: "string",
51
- desc: "Glob pattern to filter files (e.g. \"*.js\", \"*.{ts,tsx}\") - maps to rg --glob",
52
- required: false
53
-
54
- param :type,
55
- type: "string",
56
- desc: "File type to search (rg --type). Common types: c, cpp, cs, csharp, css, dart, docker, dockercompose, elixir, erlang, go, graphql, haskell, html, java, js, json, kotlin, lua, make, markdown, md, php, py, python, ruby, rust, sass, scala, sh, sql, svelte, swift, tf, toml, ts, typescript, vim, vue, xml, yaml, zig",
57
- required: false
58
-
59
- param :output_mode,
60
- type: "string",
61
- desc: "Output mode: \"content\" shows matching lines (supports context/line number options), \"files_with_matches\" shows file paths (default), \"count\" shows match counts. All modes support head_limit.",
62
- required: false
63
-
64
- param :case_insensitive,
65
- type: "boolean",
66
- desc: "Case insensitive search (rg -i)",
67
- required: false
68
-
69
- param :multiline,
70
- type: "boolean",
71
- desc: "Enable multiline mode where . matches newlines and patterns can span lines (rg -U --multiline-dotall)",
72
- required: false
73
-
74
- param :context_before,
75
- type: "integer",
76
- desc: "Number of lines to show before each match (rg -B). Requires output_mode: \"content\", ignored otherwise.",
77
- required: false
78
-
79
- param :context_after,
80
- type: "integer",
81
- desc: "Number of lines to show after each match (rg -A). Requires output_mode: \"content\", ignored otherwise.",
82
- required: false
83
-
84
- param :context,
85
- type: "integer",
86
- desc: "Number of lines to show before and after each match (rg -C). Requires output_mode: \"content\", ignored otherwise.",
87
- required: false
88
-
89
- param :show_line_numbers,
90
- type: "boolean",
91
- desc: "Show line numbers in output (rg -n). Requires output_mode: \"content\", ignored otherwise.",
92
- required: false
93
-
94
- param :head_limit,
95
- type: "integer",
96
- desc: "Limit output to first N lines/entries, equivalent to \"| head -N\". Works across all output modes: content (limits output lines), files_with_matches (limits file paths), count (limits count entries). When unspecified, shows all results from ripgrep.",
97
- required: false
98
-
99
- def execute(
100
- pattern:,
101
- path: nil,
102
- glob: nil,
103
- type: nil,
104
- output_mode: "files_with_matches",
105
- case_insensitive: false,
106
- multiline: false,
107
- context_before: nil,
108
- context_after: nil,
109
- context: nil,
110
- show_line_numbers: false,
111
- head_limit: nil
112
- )
113
- # Validate inputs
114
- return validation_error("pattern is required") if pattern.nil? || pattern.empty?
115
-
116
- # CRITICAL: Default path to agent's directory (NOT current directory)
117
- path = if path.nil? || path.to_s.strip.empty?
118
- @directory
119
- else
120
- # Resolve relative paths against agent directory
121
- resolve_path(path)
122
- end
123
-
124
- # Validate output_mode
125
- valid_modes = ["content", "files_with_matches", "count"]
126
- unless valid_modes.include?(output_mode)
127
- return validation_error("output_mode must be one of: #{valid_modes.join(", ")}")
128
- end
129
-
130
- # Build ripgrep command
131
- cmd = ["rg"]
132
-
133
- # Output mode flags
134
- case output_mode
135
- when "files_with_matches"
136
- cmd << "-l" # List files with matches
137
- when "count"
138
- cmd << "-c" # Count matches per file
139
- when "content"
140
- # Default mode, no special flag needed
141
- # Add line numbers if requested
142
- cmd << "-n" if show_line_numbers
143
-
144
- # Add context flags
145
- cmd << "-B" << context_before.to_s if context_before
146
- cmd << "-A" << context_after.to_s if context_after
147
- cmd << "-C" << context.to_s if context
148
- end
149
-
150
- # Case sensitivity
151
- cmd << "-i" if case_insensitive
152
-
153
- # Multiline mode
154
- if multiline
155
- cmd << "-U" << "--multiline-dotall"
156
- end
157
-
158
- # File filtering (only add if non-empty)
159
- cmd << "--type" << type if type && !type.to_s.strip.empty?
160
- cmd << "--glob" << glob if glob && !glob.to_s.strip.empty?
161
-
162
- # Pattern
163
- cmd << "-e" << pattern
164
-
165
- # Path
166
- cmd << path
167
-
168
- # Execute command
169
- begin
170
- require "open3"
171
-
172
- stdout, stderr, status = Open3.capture3(*cmd)
173
-
174
- # Handle no matches (exit code 1 for ripgrep means no matches found)
175
- if status.exitstatus == 1 && stderr.empty?
176
- return "No matches found for pattern: #{pattern}"
177
- end
178
-
179
- # Handle errors (exit code 2 means error)
180
- if status.exitstatus == 2 || !stderr.empty?
181
- return error("ripgrep error: #{stderr}")
182
- end
183
-
184
- # Success - format output
185
- output = stdout
186
-
187
- # Apply head_limit if specified
188
- if head_limit && head_limit > 0
189
- lines = output.lines
190
- if lines.count > head_limit
191
- output = lines.take(head_limit).join
192
- output += "\n\n<system-reminder>Output limited to first #{head_limit} lines. Total results: #{lines.count} lines.</system-reminder>"
193
- end
194
- end
195
-
196
- # Add reminder about usage
197
- reminder = build_usage_reminder(output_mode, pattern)
198
- output = "#{output}\n\n#{reminder}" unless reminder.empty?
199
-
200
- output.empty? ? "No matches found for pattern: #{pattern}" : output
201
- rescue Errno::ENOENT
202
- error("ripgrep (rg) is not installed or not in PATH. Please install ripgrep to use the Grep tool.")
203
- rescue Errno::EACCES
204
- error("Permission denied: Cannot search in '#{path}'")
205
- rescue StandardError => e
206
- error("Failed to execute search: #{e.class.name} - #{e.message}")
207
- end
208
- rescue StandardError => e
209
- error("Unexpected error during search: #{e.class.name} - #{e.message}")
210
- end
211
-
212
- private
213
-
214
- def validation_error(message)
215
- "<tool_use_error>InputValidationError: #{message}</tool_use_error>"
216
- end
217
-
218
- def error(message)
219
- "Error: #{message}"
220
- end
221
-
222
- def build_usage_reminder(output_mode, pattern)
223
- return "" if output_mode == "content"
224
-
225
- <<~REMINDER
226
- <system-reminder>
227
- You used output_mode: '#{output_mode}' which only shows #{output_mode == "files_with_matches" ? "file paths" : "match counts"}.
228
- To see the actual matching lines and their content, use output_mode: 'content'.
229
- You can also add show_line_numbers: true and context lines (context_before, context_after, or context) for better context.
230
- </system-reminder>
231
- REMINDER
232
- end
233
- end
234
- end
235
- end
@@ -1,43 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module SwarmSDK
4
- module Tools
5
- module ImageExtractors
6
- # Extracts images from DOCX documents
7
- # DOCX files are ZIP archives with images stored in word/media/
8
- class DocxImageExtractor
9
- class << self
10
- # Extract all images from a DOCX document
11
- # @param doc [Docx::Document] The DOCX document instance
12
- # @param docx_path [String] Path to the DOCX file
13
- # @return [Array<String>] Array of temporary file paths containing extracted images
14
- def extract_images(doc, docx_path)
15
- image_paths = []
16
- temp_dir = Dir.mktmpdir("docx_images_#{File.basename(docx_path, ".*")}")
17
-
18
- # DOCX files are ZIP archives with images in word/media/
19
- doc.zip.glob("word/media/*").each do |entry|
20
- next unless entry.file?
21
-
22
- # Check if it's an image by extension
23
- next unless entry.name.match?(/\.(png|jpe?g|gif|bmp|tiff?)$/i)
24
-
25
- output_path = File.join(temp_dir, File.basename(entry.name))
26
-
27
- File.open(output_path, "wb") do |f|
28
- f.write(doc.zip.read(entry.name))
29
- end
30
-
31
- image_paths << output_path
32
- end
33
-
34
- image_paths
35
- rescue StandardError
36
- # If image extraction fails, don't fail the entire document read
37
- []
38
- end
39
- end
40
- end
41
- end
42
- end
43
- end
@@ -1,167 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module SwarmSDK
4
- module Tools
5
- module ImageExtractors
6
- # Extracts images from PDF documents
7
- # Only extracts JPEG images (DCTDecode format) which are LLM API compatible
8
- # Non-JPEG images (FlateDecode, LZWDecode) are skipped because they would
9
- # require TIFF format which is not supported by LLM APIs
10
- # Supported LLM image formats: ['png', 'jpeg', 'gif', 'webp']
11
- class PdfImageExtractor
12
- class << self
13
- # Extract all images from a PDF document
14
- # @param reader [PDF::Reader] The PDF reader instance
15
- # @param pdf_path [String] Path to the PDF file
16
- # @return [Array<String>] Array of temporary file paths containing extracted images
17
- def extract_images(reader, pdf_path)
18
- image_paths = []
19
- temp_dir = Dir.mktmpdir("pdf_images_#{File.basename(pdf_path, ".*")}")
20
-
21
- reader.pages.each_with_index do |page, page_index|
22
- page_images = extract_from_page(page, page_index + 1, temp_dir)
23
- image_paths.concat(page_images)
24
- end
25
-
26
- image_paths
27
- rescue StandardError
28
- # If image extraction fails, log it but don't fail the entire PDF read
29
- []
30
- end
31
-
32
- # Extract images from a single PDF page
33
- # @param page [PDF::Reader::Page] The PDF page
34
- # @param page_number [Integer] Page number (1-indexed)
35
- # @param temp_dir [String] Directory to save extracted images
36
- # @return [Array<String>] Array of file paths for extracted images
37
- def extract_from_page(page, page_number, temp_dir)
38
- extracted_files = []
39
-
40
- # Get XObjects (external objects) from the page
41
- xobjects = page.xobjects
42
- return extracted_files if xobjects.empty?
43
-
44
- xobjects.each do |name, stream|
45
- # Only process Image XObjects (not Form XObjects)
46
- next unless stream.hash[:Subtype] == :Image
47
-
48
- file_path = save_image(stream, page_number, name, temp_dir)
49
- extracted_files << file_path if file_path
50
- end
51
-
52
- extracted_files
53
- rescue StandardError
54
- # If extraction fails for this page, continue with others
55
- []
56
- end
57
-
58
- # Save a PDF image stream to disk
59
- # Supports JPEG (DCTDecode) and raw formats
60
- # @param stream [PDF::Reader::Stream] The image stream
61
- # @param page_number [Integer] Page number
62
- # @param name [Symbol] Image name from XObject
63
- # @param temp_dir [String] Directory to save the image
64
- # @return [String, nil] File path if successful, nil otherwise
65
- def save_image(stream, page_number, name, temp_dir)
66
- filter = stream.hash[:Filter]
67
-
68
- case filter
69
- when :DCTDecode
70
- # JPEG images can be saved directly - LLM API compatible
71
- save_jpeg(stream, page_number, name, temp_dir)
72
- when :FlateDecode, :LZWDecode, nil
73
- # Skip non-JPEG images to avoid TIFF format (not supported by LLM APIs)
74
- # LLM APIs only support: ['png', 'jpeg', 'gif', 'webp']
75
- # These images would require TIFF conversion which causes API errors
76
- nil
77
- end
78
- # Unsupported formats return nil
79
- rescue StandardError
80
- # If saving fails, skip this image
81
- nil
82
- end
83
-
84
- # Save JPEG image directly from PDF stream
85
- # @param stream [PDF::Reader::Stream] The image stream
86
- # @param page_number [Integer] Page number
87
- # @param name [Symbol] Image name
88
- # @param temp_dir [String] Directory to save the image
89
- # @return [String] File path
90
- def save_jpeg(stream, page_number, name, temp_dir)
91
- filename = File.join(temp_dir, "page-#{page_number}-#{name}.jpg")
92
-
93
- # JPEG images can be written directly - the stream.data contains a complete JPEG file
94
- File.open(filename, "wb") do |file|
95
- file.write(stream.data)
96
- end
97
-
98
- filename
99
- end
100
-
101
- # Save raw image data as TIFF
102
- # @param stream [PDF::Reader::Stream] The image stream
103
- # @param page_number [Integer] Page number
104
- # @param name [Symbol] Image name
105
- # @param temp_dir [String] Directory to save the image
106
- # @return [String, nil] File path if successful, nil for unsupported color spaces
107
- def save_as_tiff(stream, page_number, name, temp_dir)
108
- color_space = stream.hash[:ColorSpace]
109
-
110
- case color_space
111
- when :DeviceRGB
112
- save_rgb_tiff(stream, page_number, name, temp_dir)
113
- when :DeviceGray
114
- save_gray_tiff(stream, page_number, name, temp_dir)
115
- end
116
- # Unsupported color spaces return nil
117
- rescue StandardError
118
- # If conversion fails, skip this image
119
- nil
120
- end
121
-
122
- # Save RGB image as TIFF
123
- # @param stream [PDF::Reader::Stream] The image stream
124
- # @param page_number [Integer] Page number
125
- # @param name [Symbol] Image name
126
- # @param temp_dir [String] Directory to save the image
127
- # @return [String] File path
128
- def save_rgb_tiff(stream, page_number, name, temp_dir)
129
- filename = File.join(temp_dir, "page-#{page_number}-#{name}.tif")
130
-
131
- width = stream.hash[:Width]
132
- height = stream.hash[:Height]
133
- bpc = stream.hash[:BitsPerComponent] || 8
134
-
135
- # Build TIFF header
136
- tiff = ImageFormats::TiffBuilder.build_rgb_header(width, height, bpc)
137
- tiff << stream.unfiltered_data # Get decompressed raw pixel data
138
-
139
- File.open(filename, "wb") { |file| file.write(tiff) }
140
- filename
141
- end
142
-
143
- # Save grayscale image as TIFF
144
- # @param stream [PDF::Reader::Stream] The image stream
145
- # @param page_number [Integer] Page number
146
- # @param name [Symbol] Image name
147
- # @param temp_dir [String] Directory to save the image
148
- # @return [String] File path
149
- def save_gray_tiff(stream, page_number, name, temp_dir)
150
- filename = File.join(temp_dir, "page-#{page_number}-#{name}.tif")
151
-
152
- width = stream.hash[:Width]
153
- height = stream.hash[:Height]
154
- bpc = stream.hash[:BitsPerComponent] || 8
155
-
156
- # Build TIFF header for grayscale
157
- tiff = ImageFormats::TiffBuilder.build_gray_header(width, height, bpc)
158
- tiff << stream.unfiltered_data
159
-
160
- File.open(filename, "wb") { |file| file.write(tiff) }
161
- filename
162
- end
163
- end
164
- end
165
- end
166
- end
167
- end
@@ -1,65 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module SwarmSDK
4
- module Tools
5
- module ImageFormats
6
- # Builds TIFF image files from raw pixel data
7
- # Supports RGB and grayscale color spaces
8
- class TiffBuilder
9
- class << self
10
- # Build TIFF header for RGB images
11
- # @param width [Integer] Image width in pixels
12
- # @param height [Integer] Image height in pixels
13
- # @param bpc [Integer] Bits per component (typically 8)
14
- # @return [String] Binary TIFF header
15
- def build_rgb_header(width, height, bpc)
16
- # Helper lambdas for TIFF tags
17
- long_tag = ->(tag, count, value) { [tag, 4, count, value].pack("ssII") }
18
- short_tag = ->(tag, count, value) { [tag, 3, count, value].pack("ssII") }
19
-
20
- tag_count = 8
21
- header = [73, 73, 42, 8, tag_count].pack("ccsIs") # Little-endian TIFF
22
-
23
- tiff = header.dup
24
- tiff << short_tag.call(256, 1, width) # ImageWidth
25
- tiff << short_tag.call(257, 1, height) # ImageHeight
26
- tiff << long_tag.call(258, 3, header.size + (tag_count * 12) + 4) # BitsPerSample
27
- tiff << short_tag.call(259, 1, 1) # Compression (none)
28
- tiff << short_tag.call(262, 1, 2) # PhotometricInterpretation (RGB)
29
- tiff << long_tag.call(273, 1, header.size + (tag_count * 12) + 16) # StripOffsets
30
- tiff << short_tag.call(277, 1, 3) # SamplesPerPixel
31
- tiff << long_tag.call(279, 1, width * height * 3) # StripByteCounts
32
- tiff << [0].pack("I") # Next IFD pointer
33
- tiff << [bpc, bpc, bpc].pack("III") # BitsPerSample values
34
- tiff
35
- end
36
-
37
- # Build TIFF header for grayscale images
38
- # @param width [Integer] Image width in pixels
39
- # @param height [Integer] Image height in pixels
40
- # @param bpc [Integer] Bits per component (typically 8)
41
- # @return [String] Binary TIFF header
42
- def build_gray_header(width, height, bpc)
43
- long_tag = ->(tag, count, value) { [tag, 4, count, value].pack("ssII") }
44
- short_tag = ->(tag, count, value) { [tag, 3, count, value].pack("ssII") }
45
-
46
- tag_count = 8
47
- header = [73, 73, 42, 8, tag_count].pack("ccsIs")
48
-
49
- tiff = header.dup
50
- tiff << short_tag.call(256, 1, width) # ImageWidth
51
- tiff << short_tag.call(257, 1, height) # ImageHeight
52
- tiff << short_tag.call(258, 1, bpc) # BitsPerSample
53
- tiff << short_tag.call(259, 1, 1) # Compression (none)
54
- tiff << short_tag.call(262, 1, 1) # PhotometricInterpretation (MinIsBlack)
55
- tiff << long_tag.call(273, 1, header.size + (tag_count * 12) + 4) # StripOffsets
56
- tiff << short_tag.call(277, 1, 1) # SamplesPerPixel
57
- tiff << long_tag.call(279, 1, width * height) # StripByteCounts
58
- tiff << [0].pack("I") # Next IFD pointer
59
- tiff
60
- end
61
- end
62
- end
63
- end
64
- end
65
- end