swarm_memory 2.1.5 → 2.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (182) hide show
  1. checksums.yaml +4 -4
  2. data/lib/swarm_memory/version.rb +1 -1
  3. metadata +5 -184
  4. data/lib/claude_swarm/base_executor.rb +0 -133
  5. data/lib/claude_swarm/claude_code_executor.rb +0 -349
  6. data/lib/claude_swarm/claude_mcp_server.rb +0 -78
  7. data/lib/claude_swarm/cli.rb +0 -697
  8. data/lib/claude_swarm/commands/ps.rb +0 -215
  9. data/lib/claude_swarm/commands/show.rb +0 -139
  10. data/lib/claude_swarm/configuration.rb +0 -373
  11. data/lib/claude_swarm/hooks/session_start_hook.rb +0 -42
  12. data/lib/claude_swarm/json_handler.rb +0 -91
  13. data/lib/claude_swarm/mcp_generator.rb +0 -230
  14. data/lib/claude_swarm/openai/chat_completion.rb +0 -256
  15. data/lib/claude_swarm/openai/executor.rb +0 -256
  16. data/lib/claude_swarm/openai/responses.rb +0 -319
  17. data/lib/claude_swarm/orchestrator.rb +0 -878
  18. data/lib/claude_swarm/process_tracker.rb +0 -78
  19. data/lib/claude_swarm/session_cost_calculator.rb +0 -209
  20. data/lib/claude_swarm/session_path.rb +0 -42
  21. data/lib/claude_swarm/settings_generator.rb +0 -77
  22. data/lib/claude_swarm/system_utils.rb +0 -46
  23. data/lib/claude_swarm/templates/generation_prompt.md.erb +0 -230
  24. data/lib/claude_swarm/tools/reset_session_tool.rb +0 -24
  25. data/lib/claude_swarm/tools/session_info_tool.rb +0 -24
  26. data/lib/claude_swarm/tools/task_tool.rb +0 -63
  27. data/lib/claude_swarm/version.rb +0 -5
  28. data/lib/claude_swarm/worktree_manager.rb +0 -475
  29. data/lib/claude_swarm/yaml_loader.rb +0 -22
  30. data/lib/claude_swarm.rb +0 -67
  31. data/lib/swarm_cli/cli.rb +0 -201
  32. data/lib/swarm_cli/command_registry.rb +0 -61
  33. data/lib/swarm_cli/commands/mcp_serve.rb +0 -130
  34. data/lib/swarm_cli/commands/mcp_tools.rb +0 -148
  35. data/lib/swarm_cli/commands/migrate.rb +0 -55
  36. data/lib/swarm_cli/commands/run.rb +0 -173
  37. data/lib/swarm_cli/config_loader.rb +0 -98
  38. data/lib/swarm_cli/formatters/human_formatter.rb +0 -781
  39. data/lib/swarm_cli/formatters/json_formatter.rb +0 -51
  40. data/lib/swarm_cli/interactive_repl.rb +0 -924
  41. data/lib/swarm_cli/mcp_serve_options.rb +0 -44
  42. data/lib/swarm_cli/mcp_tools_options.rb +0 -59
  43. data/lib/swarm_cli/migrate_options.rb +0 -54
  44. data/lib/swarm_cli/migrator.rb +0 -132
  45. data/lib/swarm_cli/options.rb +0 -151
  46. data/lib/swarm_cli/ui/components/agent_badge.rb +0 -33
  47. data/lib/swarm_cli/ui/components/content_block.rb +0 -120
  48. data/lib/swarm_cli/ui/components/divider.rb +0 -57
  49. data/lib/swarm_cli/ui/components/panel.rb +0 -62
  50. data/lib/swarm_cli/ui/components/usage_stats.rb +0 -70
  51. data/lib/swarm_cli/ui/formatters/cost.rb +0 -49
  52. data/lib/swarm_cli/ui/formatters/number.rb +0 -58
  53. data/lib/swarm_cli/ui/formatters/text.rb +0 -77
  54. data/lib/swarm_cli/ui/formatters/time.rb +0 -73
  55. data/lib/swarm_cli/ui/icons.rb +0 -36
  56. data/lib/swarm_cli/ui/renderers/event_renderer.rb +0 -188
  57. data/lib/swarm_cli/ui/state/agent_color_cache.rb +0 -45
  58. data/lib/swarm_cli/ui/state/depth_tracker.rb +0 -40
  59. data/lib/swarm_cli/ui/state/spinner_manager.rb +0 -170
  60. data/lib/swarm_cli/ui/state/usage_tracker.rb +0 -62
  61. data/lib/swarm_cli/version.rb +0 -5
  62. data/lib/swarm_cli.rb +0 -46
  63. data/lib/swarm_sdk/agent/RETRY_LOGIC.md +0 -127
  64. data/lib/swarm_sdk/agent/builder.rb +0 -552
  65. data/lib/swarm_sdk/agent/chat.rb +0 -774
  66. data/lib/swarm_sdk/agent/chat_helpers/context_tracker.rb +0 -268
  67. data/lib/swarm_sdk/agent/chat_helpers/event_emitter.rb +0 -204
  68. data/lib/swarm_sdk/agent/chat_helpers/hook_integration.rb +0 -480
  69. data/lib/swarm_sdk/agent/chat_helpers/instrumentation.rb +0 -78
  70. data/lib/swarm_sdk/agent/chat_helpers/llm_configuration.rb +0 -233
  71. data/lib/swarm_sdk/agent/chat_helpers/logging_helpers.rb +0 -116
  72. data/lib/swarm_sdk/agent/chat_helpers/serialization.rb +0 -83
  73. data/lib/swarm_sdk/agent/chat_helpers/system_reminder_injector.rb +0 -136
  74. data/lib/swarm_sdk/agent/chat_helpers/system_reminders.rb +0 -79
  75. data/lib/swarm_sdk/agent/chat_helpers/token_tracking.rb +0 -98
  76. data/lib/swarm_sdk/agent/context.rb +0 -116
  77. data/lib/swarm_sdk/agent/context_manager.rb +0 -315
  78. data/lib/swarm_sdk/agent/definition.rb +0 -477
  79. data/lib/swarm_sdk/agent/llm_instrumentation_middleware.rb +0 -182
  80. data/lib/swarm_sdk/agent/system_prompt_builder.rb +0 -161
  81. data/lib/swarm_sdk/builders/base_builder.rb +0 -409
  82. data/lib/swarm_sdk/claude_code_agent_adapter.rb +0 -205
  83. data/lib/swarm_sdk/concerns/cleanupable.rb +0 -39
  84. data/lib/swarm_sdk/concerns/snapshotable.rb +0 -67
  85. data/lib/swarm_sdk/concerns/validatable.rb +0 -55
  86. data/lib/swarm_sdk/configuration/parser.rb +0 -353
  87. data/lib/swarm_sdk/configuration/translator.rb +0 -255
  88. data/lib/swarm_sdk/configuration.rb +0 -135
  89. data/lib/swarm_sdk/context_compactor/metrics.rb +0 -147
  90. data/lib/swarm_sdk/context_compactor/token_counter.rb +0 -106
  91. data/lib/swarm_sdk/context_compactor.rb +0 -335
  92. data/lib/swarm_sdk/context_management/builder.rb +0 -128
  93. data/lib/swarm_sdk/context_management/context.rb +0 -328
  94. data/lib/swarm_sdk/defaults.rb +0 -196
  95. data/lib/swarm_sdk/events_to_messages.rb +0 -199
  96. data/lib/swarm_sdk/hooks/adapter.rb +0 -359
  97. data/lib/swarm_sdk/hooks/context.rb +0 -197
  98. data/lib/swarm_sdk/hooks/definition.rb +0 -80
  99. data/lib/swarm_sdk/hooks/error.rb +0 -29
  100. data/lib/swarm_sdk/hooks/executor.rb +0 -146
  101. data/lib/swarm_sdk/hooks/registry.rb +0 -147
  102. data/lib/swarm_sdk/hooks/result.rb +0 -150
  103. data/lib/swarm_sdk/hooks/shell_executor.rb +0 -255
  104. data/lib/swarm_sdk/hooks/tool_call.rb +0 -35
  105. data/lib/swarm_sdk/hooks/tool_result.rb +0 -62
  106. data/lib/swarm_sdk/log_collector.rb +0 -227
  107. data/lib/swarm_sdk/log_stream.rb +0 -127
  108. data/lib/swarm_sdk/markdown_parser.rb +0 -75
  109. data/lib/swarm_sdk/model_aliases.json +0 -8
  110. data/lib/swarm_sdk/models.json +0 -1
  111. data/lib/swarm_sdk/models.rb +0 -120
  112. data/lib/swarm_sdk/node_context.rb +0 -245
  113. data/lib/swarm_sdk/observer/builder.rb +0 -81
  114. data/lib/swarm_sdk/observer/config.rb +0 -45
  115. data/lib/swarm_sdk/observer/manager.rb +0 -236
  116. data/lib/swarm_sdk/patterns/agent_observer.rb +0 -160
  117. data/lib/swarm_sdk/permissions/config.rb +0 -239
  118. data/lib/swarm_sdk/permissions/error_formatter.rb +0 -121
  119. data/lib/swarm_sdk/permissions/path_matcher.rb +0 -35
  120. data/lib/swarm_sdk/permissions/validator.rb +0 -173
  121. data/lib/swarm_sdk/permissions_builder.rb +0 -122
  122. data/lib/swarm_sdk/plugin.rb +0 -309
  123. data/lib/swarm_sdk/plugin_registry.rb +0 -101
  124. data/lib/swarm_sdk/proc_helpers.rb +0 -53
  125. data/lib/swarm_sdk/prompts/base_system_prompt.md.erb +0 -117
  126. data/lib/swarm_sdk/restore_result.rb +0 -65
  127. data/lib/swarm_sdk/result.rb +0 -123
  128. data/lib/swarm_sdk/snapshot.rb +0 -156
  129. data/lib/swarm_sdk/snapshot_from_events.rb +0 -397
  130. data/lib/swarm_sdk/state_restorer.rb +0 -476
  131. data/lib/swarm_sdk/state_snapshot.rb +0 -334
  132. data/lib/swarm_sdk/swarm/agent_initializer.rb +0 -683
  133. data/lib/swarm_sdk/swarm/all_agents_builder.rb +0 -167
  134. data/lib/swarm_sdk/swarm/builder.rb +0 -249
  135. data/lib/swarm_sdk/swarm/executor.rb +0 -213
  136. data/lib/swarm_sdk/swarm/hook_triggers.rb +0 -150
  137. data/lib/swarm_sdk/swarm/logging_callbacks.rb +0 -340
  138. data/lib/swarm_sdk/swarm/mcp_configurator.rb +0 -154
  139. data/lib/swarm_sdk/swarm/swarm_registry_builder.rb +0 -67
  140. data/lib/swarm_sdk/swarm/tool_configurator.rb +0 -358
  141. data/lib/swarm_sdk/swarm.rb +0 -717
  142. data/lib/swarm_sdk/swarm_loader.rb +0 -145
  143. data/lib/swarm_sdk/swarm_registry.rb +0 -136
  144. data/lib/swarm_sdk/tools/bash.rb +0 -282
  145. data/lib/swarm_sdk/tools/clock.rb +0 -44
  146. data/lib/swarm_sdk/tools/delegate.rb +0 -267
  147. data/lib/swarm_sdk/tools/document_converters/base_converter.rb +0 -83
  148. data/lib/swarm_sdk/tools/document_converters/docx_converter.rb +0 -99
  149. data/lib/swarm_sdk/tools/document_converters/html_converter.rb +0 -101
  150. data/lib/swarm_sdk/tools/document_converters/pdf_converter.rb +0 -78
  151. data/lib/swarm_sdk/tools/document_converters/xlsx_converter.rb +0 -194
  152. data/lib/swarm_sdk/tools/edit.rb +0 -145
  153. data/lib/swarm_sdk/tools/glob.rb +0 -166
  154. data/lib/swarm_sdk/tools/grep.rb +0 -235
  155. data/lib/swarm_sdk/tools/image_extractors/docx_image_extractor.rb +0 -43
  156. data/lib/swarm_sdk/tools/image_extractors/pdf_image_extractor.rb +0 -163
  157. data/lib/swarm_sdk/tools/image_formats/tiff_builder.rb +0 -65
  158. data/lib/swarm_sdk/tools/multi_edit.rb +0 -236
  159. data/lib/swarm_sdk/tools/path_resolver.rb +0 -92
  160. data/lib/swarm_sdk/tools/read.rb +0 -261
  161. data/lib/swarm_sdk/tools/registry.rb +0 -205
  162. data/lib/swarm_sdk/tools/scratchpad/scratchpad_list.rb +0 -117
  163. data/lib/swarm_sdk/tools/scratchpad/scratchpad_read.rb +0 -97
  164. data/lib/swarm_sdk/tools/scratchpad/scratchpad_write.rb +0 -108
  165. data/lib/swarm_sdk/tools/stores/read_tracker.rb +0 -96
  166. data/lib/swarm_sdk/tools/stores/scratchpad_storage.rb +0 -272
  167. data/lib/swarm_sdk/tools/stores/storage.rb +0 -142
  168. data/lib/swarm_sdk/tools/stores/todo_manager.rb +0 -65
  169. data/lib/swarm_sdk/tools/think.rb +0 -98
  170. data/lib/swarm_sdk/tools/todo_write.rb +0 -235
  171. data/lib/swarm_sdk/tools/web_fetch.rb +0 -262
  172. data/lib/swarm_sdk/tools/write.rb +0 -112
  173. data/lib/swarm_sdk/utils.rb +0 -68
  174. data/lib/swarm_sdk/validation_result.rb +0 -33
  175. data/lib/swarm_sdk/version.rb +0 -5
  176. data/lib/swarm_sdk/workflow/agent_config.rb +0 -79
  177. data/lib/swarm_sdk/workflow/builder.rb +0 -143
  178. data/lib/swarm_sdk/workflow/executor.rb +0 -497
  179. data/lib/swarm_sdk/workflow/node_builder.rb +0 -555
  180. data/lib/swarm_sdk/workflow/transformer_executor.rb +0 -249
  181. data/lib/swarm_sdk/workflow.rb +0 -554
  182. data/lib/swarm_sdk.rb +0 -524
@@ -1,235 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module SwarmSDK
4
- module Tools
5
- # Grep tool for searching file contents using ripgrep-style patterns
6
- #
7
- # Powerful search capabilities with regex support, context lines, and filtering.
8
- # Built on ripgrep (rg) for fast, efficient searching.
9
- class Grep < RubyLLM::Tool
10
- include PathResolver
11
-
12
- # Factory pattern: declare what parameters this tool needs for instantiation
13
- class << self
14
- def creation_requirements
15
- [:directory]
16
- end
17
- end
18
-
19
- def initialize(directory:)
20
- super()
21
- @directory = File.expand_path(directory)
22
- end
23
-
24
- define_method(:name) { "Grep" }
25
-
26
- description <<~DESC
27
- A powerful search tool built on ripgrep
28
-
29
- Usage:
30
- - ALWAYS use Grep for search tasks. NEVER invoke `grep` or `rg` as a Bash command. The Grep tool has been optimized for correct permissions and access.
31
- - Supports full regex syntax (e.g., "log.*Error", "function\\s+\\w+")
32
- - Filter files with glob parameter (e.g., "*.js", "**/*.tsx") or type parameter (e.g., "js", "py", "rust")
33
- - Output modes: "content" shows matching lines, "files_with_matches" shows only file paths (default), "count" shows match counts
34
- - Use Task tool for open-ended searches requiring multiple rounds
35
- - Pattern syntax: Uses ripgrep (not grep) - literal braces need escaping (use `interface\\{\\}` to find `interface{}` in Go code)
36
- - Multiline matching: By default patterns match within single lines only. For cross-line patterns like `struct \\{[\\s\\S]*?field`, use `multiline: true`
37
- DESC
38
-
39
- param :pattern,
40
- type: "string",
41
- desc: "The regular expression pattern to search for in file contents",
42
- required: true
43
-
44
- param :path,
45
- type: "string",
46
- desc: "File or directory to search in (rg PATH). Defaults to current working directory.",
47
- required: false
48
-
49
- param :glob,
50
- type: "string",
51
- desc: "Glob pattern to filter files (e.g. \"*.js\", \"*.{ts,tsx}\") - maps to rg --glob",
52
- required: false
53
-
54
- param :type,
55
- type: "string",
56
- desc: "File type to search (rg --type). Common types: c, cpp, cs, csharp, css, dart, docker, dockercompose, elixir, erlang, go, graphql, haskell, html, java, js, json, kotlin, lua, make, markdown, md, php, py, python, ruby, rust, sass, scala, sh, sql, svelte, swift, tf, toml, ts, typescript, vim, vue, xml, yaml, zig",
57
- required: false
58
-
59
- param :output_mode,
60
- type: "string",
61
- desc: "Output mode: \"content\" shows matching lines (supports context/line number options), \"files_with_matches\" shows file paths (default), \"count\" shows match counts. All modes support head_limit.",
62
- required: false
63
-
64
- param :case_insensitive,
65
- type: "boolean",
66
- desc: "Case insensitive search (rg -i)",
67
- required: false
68
-
69
- param :multiline,
70
- type: "boolean",
71
- desc: "Enable multiline mode where . matches newlines and patterns can span lines (rg -U --multiline-dotall)",
72
- required: false
73
-
74
- param :context_before,
75
- type: "integer",
76
- desc: "Number of lines to show before each match (rg -B). Requires output_mode: \"content\", ignored otherwise.",
77
- required: false
78
-
79
- param :context_after,
80
- type: "integer",
81
- desc: "Number of lines to show after each match (rg -A). Requires output_mode: \"content\", ignored otherwise.",
82
- required: false
83
-
84
- param :context,
85
- type: "integer",
86
- desc: "Number of lines to show before and after each match (rg -C). Requires output_mode: \"content\", ignored otherwise.",
87
- required: false
88
-
89
- param :show_line_numbers,
90
- type: "boolean",
91
- desc: "Show line numbers in output (rg -n). Requires output_mode: \"content\", ignored otherwise.",
92
- required: false
93
-
94
- param :head_limit,
95
- type: "integer",
96
- desc: "Limit output to first N lines/entries, equivalent to \"| head -N\". Works across all output modes: content (limits output lines), files_with_matches (limits file paths), count (limits count entries). When unspecified, shows all results from ripgrep.",
97
- required: false
98
-
99
- def execute(
100
- pattern:,
101
- path: nil,
102
- glob: nil,
103
- type: nil,
104
- output_mode: "files_with_matches",
105
- case_insensitive: false,
106
- multiline: false,
107
- context_before: nil,
108
- context_after: nil,
109
- context: nil,
110
- show_line_numbers: false,
111
- head_limit: nil
112
- )
113
- # Validate inputs
114
- return validation_error("pattern is required") if pattern.nil? || pattern.empty?
115
-
116
- # CRITICAL: Default path to agent's directory (NOT current directory)
117
- path = if path.nil? || path.to_s.strip.empty?
118
- @directory
119
- else
120
- # Resolve relative paths against agent directory
121
- resolve_path(path)
122
- end
123
-
124
- # Validate output_mode
125
- valid_modes = ["content", "files_with_matches", "count"]
126
- unless valid_modes.include?(output_mode)
127
- return validation_error("output_mode must be one of: #{valid_modes.join(", ")}")
128
- end
129
-
130
- # Build ripgrep command
131
- cmd = ["rg"]
132
-
133
- # Output mode flags
134
- case output_mode
135
- when "files_with_matches"
136
- cmd << "-l" # List files with matches
137
- when "count"
138
- cmd << "-c" # Count matches per file
139
- when "content"
140
- # Default mode, no special flag needed
141
- # Add line numbers if requested
142
- cmd << "-n" if show_line_numbers
143
-
144
- # Add context flags
145
- cmd << "-B" << context_before.to_s if context_before
146
- cmd << "-A" << context_after.to_s if context_after
147
- cmd << "-C" << context.to_s if context
148
- end
149
-
150
- # Case sensitivity
151
- cmd << "-i" if case_insensitive
152
-
153
- # Multiline mode
154
- if multiline
155
- cmd << "-U" << "--multiline-dotall"
156
- end
157
-
158
- # File filtering (only add if non-empty)
159
- cmd << "--type" << type if type && !type.to_s.strip.empty?
160
- cmd << "--glob" << glob if glob && !glob.to_s.strip.empty?
161
-
162
- # Pattern
163
- cmd << "-e" << pattern
164
-
165
- # Path
166
- cmd << path
167
-
168
- # Execute command
169
- begin
170
- require "open3"
171
-
172
- stdout, stderr, status = Open3.capture3(*cmd)
173
-
174
- # Handle no matches (exit code 1 for ripgrep means no matches found)
175
- if status.exitstatus == 1 && stderr.empty?
176
- return "No matches found for pattern: #{pattern}"
177
- end
178
-
179
- # Handle errors (exit code 2 means error)
180
- if status.exitstatus == 2 || !stderr.empty?
181
- return error("ripgrep error: #{stderr}")
182
- end
183
-
184
- # Success - format output
185
- output = stdout
186
-
187
- # Apply head_limit if specified
188
- if head_limit && head_limit > 0
189
- lines = output.lines
190
- if lines.count > head_limit
191
- output = lines.take(head_limit).join
192
- output += "\n\n<system-reminder>Output limited to first #{head_limit} lines. Total results: #{lines.count} lines.</system-reminder>"
193
- end
194
- end
195
-
196
- # Add reminder about usage
197
- reminder = build_usage_reminder(output_mode, pattern)
198
- output = "#{output}\n\n#{reminder}" unless reminder.empty?
199
-
200
- output.empty? ? "No matches found for pattern: #{pattern}" : output
201
- rescue Errno::ENOENT
202
- error("ripgrep (rg) is not installed or not in PATH. Please install ripgrep to use the Grep tool.")
203
- rescue Errno::EACCES
204
- error("Permission denied: Cannot search in '#{path}'")
205
- rescue StandardError => e
206
- error("Failed to execute search: #{e.class.name} - #{e.message}")
207
- end
208
- rescue StandardError => e
209
- error("Unexpected error during search: #{e.class.name} - #{e.message}")
210
- end
211
-
212
- private
213
-
214
- def validation_error(message)
215
- "<tool_use_error>InputValidationError: #{message}</tool_use_error>"
216
- end
217
-
218
- def error(message)
219
- "Error: #{message}"
220
- end
221
-
222
- def build_usage_reminder(output_mode, pattern)
223
- return "" if output_mode == "content"
224
-
225
- <<~REMINDER
226
- <system-reminder>
227
- You used output_mode: '#{output_mode}' which only shows #{output_mode == "files_with_matches" ? "file paths" : "match counts"}.
228
- To see the actual matching lines and their content, use output_mode: 'content'.
229
- You can also add show_line_numbers: true and context lines (context_before, context_after, or context) for better context.
230
- </system-reminder>
231
- REMINDER
232
- end
233
- end
234
- end
235
- end
@@ -1,43 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module SwarmSDK
4
- module Tools
5
- module ImageExtractors
6
- # Extracts images from DOCX documents
7
- # DOCX files are ZIP archives with images stored in word/media/
8
- class DocxImageExtractor
9
- class << self
10
- # Extract all images from a DOCX document
11
- # @param doc [Docx::Document] The DOCX document instance
12
- # @param docx_path [String] Path to the DOCX file
13
- # @return [Array<String>] Array of temporary file paths containing extracted images
14
- def extract_images(doc, docx_path)
15
- image_paths = []
16
- temp_dir = Dir.mktmpdir("docx_images_#{File.basename(docx_path, ".*")}")
17
-
18
- # DOCX files are ZIP archives with images in word/media/
19
- doc.zip.glob("word/media/*").each do |entry|
20
- next unless entry.file?
21
-
22
- # Check if it's an image by extension
23
- next unless entry.name.match?(/\.(png|jpe?g|gif|bmp|tiff?)$/i)
24
-
25
- output_path = File.join(temp_dir, File.basename(entry.name))
26
-
27
- File.open(output_path, "wb") do |f|
28
- f.write(doc.zip.read(entry.name))
29
- end
30
-
31
- image_paths << output_path
32
- end
33
-
34
- image_paths
35
- rescue StandardError
36
- # If image extraction fails, don't fail the entire document read
37
- []
38
- end
39
- end
40
- end
41
- end
42
- end
43
- end
@@ -1,163 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module SwarmSDK
4
- module Tools
5
- module ImageExtractors
6
- # Extracts images from PDF documents
7
- # Supports JPEG (DCTDecode), FlateDecode, and LZWDecode formats
8
- # Converts non-JPEG images to TIFF format
9
- class PdfImageExtractor
10
- class << self
11
- # Extract all images from a PDF document
12
- # @param reader [PDF::Reader] The PDF reader instance
13
- # @param pdf_path [String] Path to the PDF file
14
- # @return [Array<String>] Array of temporary file paths containing extracted images
15
- def extract_images(reader, pdf_path)
16
- image_paths = []
17
- temp_dir = Dir.mktmpdir("pdf_images_#{File.basename(pdf_path, ".*")}")
18
-
19
- reader.pages.each_with_index do |page, page_index|
20
- page_images = extract_from_page(page, page_index + 1, temp_dir)
21
- image_paths.concat(page_images)
22
- end
23
-
24
- image_paths
25
- rescue StandardError
26
- # If image extraction fails, log it but don't fail the entire PDF read
27
- []
28
- end
29
-
30
- # Extract images from a single PDF page
31
- # @param page [PDF::Reader::Page] The PDF page
32
- # @param page_number [Integer] Page number (1-indexed)
33
- # @param temp_dir [String] Directory to save extracted images
34
- # @return [Array<String>] Array of file paths for extracted images
35
- def extract_from_page(page, page_number, temp_dir)
36
- extracted_files = []
37
-
38
- # Get XObjects (external objects) from the page
39
- xobjects = page.xobjects
40
- return extracted_files if xobjects.empty?
41
-
42
- xobjects.each do |name, stream|
43
- # Only process Image XObjects (not Form XObjects)
44
- next unless stream.hash[:Subtype] == :Image
45
-
46
- file_path = save_image(stream, page_number, name, temp_dir)
47
- extracted_files << file_path if file_path
48
- end
49
-
50
- extracted_files
51
- rescue StandardError
52
- # If extraction fails for this page, continue with others
53
- []
54
- end
55
-
56
- # Save a PDF image stream to disk
57
- # Supports JPEG (DCTDecode) and raw formats
58
- # @param stream [PDF::Reader::Stream] The image stream
59
- # @param page_number [Integer] Page number
60
- # @param name [Symbol] Image name from XObject
61
- # @param temp_dir [String] Directory to save the image
62
- # @return [String, nil] File path if successful, nil otherwise
63
- def save_image(stream, page_number, name, temp_dir)
64
- filter = stream.hash[:Filter]
65
-
66
- case filter
67
- when :DCTDecode
68
- # JPEG images can be saved directly
69
- save_jpeg(stream, page_number, name, temp_dir)
70
- when :FlateDecode, :LZWDecode, nil
71
- # Raw or compressed formats - save as TIFF
72
- save_as_tiff(stream, page_number, name, temp_dir)
73
- end
74
- # Unsupported formats return nil
75
- rescue StandardError
76
- # If saving fails, skip this image
77
- nil
78
- end
79
-
80
- # Save JPEG image directly from PDF stream
81
- # @param stream [PDF::Reader::Stream] The image stream
82
- # @param page_number [Integer] Page number
83
- # @param name [Symbol] Image name
84
- # @param temp_dir [String] Directory to save the image
85
- # @return [String] File path
86
- def save_jpeg(stream, page_number, name, temp_dir)
87
- filename = File.join(temp_dir, "page-#{page_number}-#{name}.jpg")
88
-
89
- # JPEG images can be written directly - the stream.data contains a complete JPEG file
90
- File.open(filename, "wb") do |file|
91
- file.write(stream.data)
92
- end
93
-
94
- filename
95
- end
96
-
97
- # Save raw image data as TIFF
98
- # @param stream [PDF::Reader::Stream] The image stream
99
- # @param page_number [Integer] Page number
100
- # @param name [Symbol] Image name
101
- # @param temp_dir [String] Directory to save the image
102
- # @return [String, nil] File path if successful, nil for unsupported color spaces
103
- def save_as_tiff(stream, page_number, name, temp_dir)
104
- color_space = stream.hash[:ColorSpace]
105
-
106
- case color_space
107
- when :DeviceRGB
108
- save_rgb_tiff(stream, page_number, name, temp_dir)
109
- when :DeviceGray
110
- save_gray_tiff(stream, page_number, name, temp_dir)
111
- end
112
- # Unsupported color spaces return nil
113
- rescue StandardError
114
- # If conversion fails, skip this image
115
- nil
116
- end
117
-
118
- # Save RGB image as TIFF
119
- # @param stream [PDF::Reader::Stream] The image stream
120
- # @param page_number [Integer] Page number
121
- # @param name [Symbol] Image name
122
- # @param temp_dir [String] Directory to save the image
123
- # @return [String] File path
124
- def save_rgb_tiff(stream, page_number, name, temp_dir)
125
- filename = File.join(temp_dir, "page-#{page_number}-#{name}.tif")
126
-
127
- width = stream.hash[:Width]
128
- height = stream.hash[:Height]
129
- bpc = stream.hash[:BitsPerComponent] || 8
130
-
131
- # Build TIFF header
132
- tiff = ImageFormats::TiffBuilder.build_rgb_header(width, height, bpc)
133
- tiff << stream.unfiltered_data # Get decompressed raw pixel data
134
-
135
- File.open(filename, "wb") { |file| file.write(tiff) }
136
- filename
137
- end
138
-
139
- # Save grayscale image as TIFF
140
- # @param stream [PDF::Reader::Stream] The image stream
141
- # @param page_number [Integer] Page number
142
- # @param name [Symbol] Image name
143
- # @param temp_dir [String] Directory to save the image
144
- # @return [String] File path
145
- def save_gray_tiff(stream, page_number, name, temp_dir)
146
- filename = File.join(temp_dir, "page-#{page_number}-#{name}.tif")
147
-
148
- width = stream.hash[:Width]
149
- height = stream.hash[:Height]
150
- bpc = stream.hash[:BitsPerComponent] || 8
151
-
152
- # Build TIFF header for grayscale
153
- tiff = ImageFormats::TiffBuilder.build_gray_header(width, height, bpc)
154
- tiff << stream.unfiltered_data
155
-
156
- File.open(filename, "wb") { |file| file.write(tiff) }
157
- filename
158
- end
159
- end
160
- end
161
- end
162
- end
163
- end
@@ -1,65 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module SwarmSDK
4
- module Tools
5
- module ImageFormats
6
- # Builds TIFF image files from raw pixel data
7
- # Supports RGB and grayscale color spaces
8
- class TiffBuilder
9
- class << self
10
- # Build TIFF header for RGB images
11
- # @param width [Integer] Image width in pixels
12
- # @param height [Integer] Image height in pixels
13
- # @param bpc [Integer] Bits per component (typically 8)
14
- # @return [String] Binary TIFF header
15
- def build_rgb_header(width, height, bpc)
16
- # Helper lambdas for TIFF tags
17
- long_tag = ->(tag, count, value) { [tag, 4, count, value].pack("ssII") }
18
- short_tag = ->(tag, count, value) { [tag, 3, count, value].pack("ssII") }
19
-
20
- tag_count = 8
21
- header = [73, 73, 42, 8, tag_count].pack("ccsIs") # Little-endian TIFF
22
-
23
- tiff = header.dup
24
- tiff << short_tag.call(256, 1, width) # ImageWidth
25
- tiff << short_tag.call(257, 1, height) # ImageHeight
26
- tiff << long_tag.call(258, 3, header.size + (tag_count * 12) + 4) # BitsPerSample
27
- tiff << short_tag.call(259, 1, 1) # Compression (none)
28
- tiff << short_tag.call(262, 1, 2) # PhotometricInterpretation (RGB)
29
- tiff << long_tag.call(273, 1, header.size + (tag_count * 12) + 16) # StripOffsets
30
- tiff << short_tag.call(277, 1, 3) # SamplesPerPixel
31
- tiff << long_tag.call(279, 1, width * height * 3) # StripByteCounts
32
- tiff << [0].pack("I") # Next IFD pointer
33
- tiff << [bpc, bpc, bpc].pack("III") # BitsPerSample values
34
- tiff
35
- end
36
-
37
- # Build TIFF header for grayscale images
38
- # @param width [Integer] Image width in pixels
39
- # @param height [Integer] Image height in pixels
40
- # @param bpc [Integer] Bits per component (typically 8)
41
- # @return [String] Binary TIFF header
42
- def build_gray_header(width, height, bpc)
43
- long_tag = ->(tag, count, value) { [tag, 4, count, value].pack("ssII") }
44
- short_tag = ->(tag, count, value) { [tag, 3, count, value].pack("ssII") }
45
-
46
- tag_count = 8
47
- header = [73, 73, 42, 8, tag_count].pack("ccsIs")
48
-
49
- tiff = header.dup
50
- tiff << short_tag.call(256, 1, width) # ImageWidth
51
- tiff << short_tag.call(257, 1, height) # ImageHeight
52
- tiff << short_tag.call(258, 1, bpc) # BitsPerSample
53
- tiff << short_tag.call(259, 1, 1) # Compression (none)
54
- tiff << short_tag.call(262, 1, 1) # PhotometricInterpretation (MinIsBlack)
55
- tiff << long_tag.call(273, 1, header.size + (tag_count * 12) + 4) # StripOffsets
56
- tiff << short_tag.call(277, 1, 1) # SamplesPerPixel
57
- tiff << long_tag.call(279, 1, width * height) # StripByteCounts
58
- tiff << [0].pack("I") # Next IFD pointer
59
- tiff
60
- end
61
- end
62
- end
63
- end
64
- end
65
- end