swarm_memory 2.1.4 → 2.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. checksums.yaml +4 -4
  2. data/lib/swarm_memory/version.rb +1 -1
  3. data/lib/swarm_memory.rb +7 -2
  4. metadata +6 -185
  5. data/lib/claude_swarm/base_executor.rb +0 -133
  6. data/lib/claude_swarm/claude_code_executor.rb +0 -349
  7. data/lib/claude_swarm/claude_mcp_server.rb +0 -78
  8. data/lib/claude_swarm/cli.rb +0 -697
  9. data/lib/claude_swarm/commands/ps.rb +0 -215
  10. data/lib/claude_swarm/commands/show.rb +0 -139
  11. data/lib/claude_swarm/configuration.rb +0 -373
  12. data/lib/claude_swarm/hooks/session_start_hook.rb +0 -42
  13. data/lib/claude_swarm/json_handler.rb +0 -91
  14. data/lib/claude_swarm/mcp_generator.rb +0 -243
  15. data/lib/claude_swarm/openai/chat_completion.rb +0 -256
  16. data/lib/claude_swarm/openai/executor.rb +0 -256
  17. data/lib/claude_swarm/openai/responses.rb +0 -319
  18. data/lib/claude_swarm/orchestrator.rb +0 -878
  19. data/lib/claude_swarm/process_tracker.rb +0 -78
  20. data/lib/claude_swarm/session_cost_calculator.rb +0 -209
  21. data/lib/claude_swarm/session_path.rb +0 -42
  22. data/lib/claude_swarm/settings_generator.rb +0 -77
  23. data/lib/claude_swarm/system_utils.rb +0 -46
  24. data/lib/claude_swarm/templates/generation_prompt.md.erb +0 -230
  25. data/lib/claude_swarm/tools/reset_session_tool.rb +0 -24
  26. data/lib/claude_swarm/tools/session_info_tool.rb +0 -24
  27. data/lib/claude_swarm/tools/task_tool.rb +0 -63
  28. data/lib/claude_swarm/version.rb +0 -5
  29. data/lib/claude_swarm/worktree_manager.rb +0 -475
  30. data/lib/claude_swarm/yaml_loader.rb +0 -22
  31. data/lib/claude_swarm.rb +0 -67
  32. data/lib/swarm_cli/cli.rb +0 -201
  33. data/lib/swarm_cli/command_registry.rb +0 -61
  34. data/lib/swarm_cli/commands/mcp_serve.rb +0 -130
  35. data/lib/swarm_cli/commands/mcp_tools.rb +0 -148
  36. data/lib/swarm_cli/commands/migrate.rb +0 -55
  37. data/lib/swarm_cli/commands/run.rb +0 -173
  38. data/lib/swarm_cli/config_loader.rb +0 -98
  39. data/lib/swarm_cli/formatters/human_formatter.rb +0 -781
  40. data/lib/swarm_cli/formatters/json_formatter.rb +0 -51
  41. data/lib/swarm_cli/interactive_repl.rb +0 -924
  42. data/lib/swarm_cli/mcp_serve_options.rb +0 -44
  43. data/lib/swarm_cli/mcp_tools_options.rb +0 -59
  44. data/lib/swarm_cli/migrate_options.rb +0 -54
  45. data/lib/swarm_cli/migrator.rb +0 -132
  46. data/lib/swarm_cli/options.rb +0 -151
  47. data/lib/swarm_cli/ui/components/agent_badge.rb +0 -33
  48. data/lib/swarm_cli/ui/components/content_block.rb +0 -120
  49. data/lib/swarm_cli/ui/components/divider.rb +0 -57
  50. data/lib/swarm_cli/ui/components/panel.rb +0 -62
  51. data/lib/swarm_cli/ui/components/usage_stats.rb +0 -70
  52. data/lib/swarm_cli/ui/formatters/cost.rb +0 -49
  53. data/lib/swarm_cli/ui/formatters/number.rb +0 -58
  54. data/lib/swarm_cli/ui/formatters/text.rb +0 -77
  55. data/lib/swarm_cli/ui/formatters/time.rb +0 -73
  56. data/lib/swarm_cli/ui/icons.rb +0 -36
  57. data/lib/swarm_cli/ui/renderers/event_renderer.rb +0 -188
  58. data/lib/swarm_cli/ui/state/agent_color_cache.rb +0 -45
  59. data/lib/swarm_cli/ui/state/depth_tracker.rb +0 -40
  60. data/lib/swarm_cli/ui/state/spinner_manager.rb +0 -170
  61. data/lib/swarm_cli/ui/state/usage_tracker.rb +0 -62
  62. data/lib/swarm_cli/version.rb +0 -5
  63. data/lib/swarm_cli.rb +0 -46
  64. data/lib/swarm_sdk/agent/RETRY_LOGIC.md +0 -127
  65. data/lib/swarm_sdk/agent/builder.rb +0 -552
  66. data/lib/swarm_sdk/agent/chat.rb +0 -774
  67. data/lib/swarm_sdk/agent/chat_helpers/context_tracker.rb +0 -268
  68. data/lib/swarm_sdk/agent/chat_helpers/event_emitter.rb +0 -204
  69. data/lib/swarm_sdk/agent/chat_helpers/hook_integration.rb +0 -480
  70. data/lib/swarm_sdk/agent/chat_helpers/instrumentation.rb +0 -78
  71. data/lib/swarm_sdk/agent/chat_helpers/llm_configuration.rb +0 -233
  72. data/lib/swarm_sdk/agent/chat_helpers/logging_helpers.rb +0 -116
  73. data/lib/swarm_sdk/agent/chat_helpers/serialization.rb +0 -83
  74. data/lib/swarm_sdk/agent/chat_helpers/system_reminder_injector.rb +0 -136
  75. data/lib/swarm_sdk/agent/chat_helpers/system_reminders.rb +0 -79
  76. data/lib/swarm_sdk/agent/chat_helpers/token_tracking.rb +0 -98
  77. data/lib/swarm_sdk/agent/context.rb +0 -116
  78. data/lib/swarm_sdk/agent/context_manager.rb +0 -315
  79. data/lib/swarm_sdk/agent/definition.rb +0 -477
  80. data/lib/swarm_sdk/agent/llm_instrumentation_middleware.rb +0 -182
  81. data/lib/swarm_sdk/agent/system_prompt_builder.rb +0 -161
  82. data/lib/swarm_sdk/builders/base_builder.rb +0 -409
  83. data/lib/swarm_sdk/claude_code_agent_adapter.rb +0 -205
  84. data/lib/swarm_sdk/concerns/cleanupable.rb +0 -39
  85. data/lib/swarm_sdk/concerns/snapshotable.rb +0 -67
  86. data/lib/swarm_sdk/concerns/validatable.rb +0 -55
  87. data/lib/swarm_sdk/configuration/parser.rb +0 -353
  88. data/lib/swarm_sdk/configuration/translator.rb +0 -255
  89. data/lib/swarm_sdk/configuration.rb +0 -135
  90. data/lib/swarm_sdk/context_compactor/metrics.rb +0 -147
  91. data/lib/swarm_sdk/context_compactor/token_counter.rb +0 -106
  92. data/lib/swarm_sdk/context_compactor.rb +0 -335
  93. data/lib/swarm_sdk/context_management/builder.rb +0 -128
  94. data/lib/swarm_sdk/context_management/context.rb +0 -328
  95. data/lib/swarm_sdk/defaults.rb +0 -196
  96. data/lib/swarm_sdk/events_to_messages.rb +0 -199
  97. data/lib/swarm_sdk/hooks/adapter.rb +0 -359
  98. data/lib/swarm_sdk/hooks/context.rb +0 -197
  99. data/lib/swarm_sdk/hooks/definition.rb +0 -80
  100. data/lib/swarm_sdk/hooks/error.rb +0 -29
  101. data/lib/swarm_sdk/hooks/executor.rb +0 -146
  102. data/lib/swarm_sdk/hooks/registry.rb +0 -147
  103. data/lib/swarm_sdk/hooks/result.rb +0 -150
  104. data/lib/swarm_sdk/hooks/shell_executor.rb +0 -255
  105. data/lib/swarm_sdk/hooks/tool_call.rb +0 -35
  106. data/lib/swarm_sdk/hooks/tool_result.rb +0 -62
  107. data/lib/swarm_sdk/log_collector.rb +0 -227
  108. data/lib/swarm_sdk/log_stream.rb +0 -127
  109. data/lib/swarm_sdk/markdown_parser.rb +0 -75
  110. data/lib/swarm_sdk/model_aliases.json +0 -8
  111. data/lib/swarm_sdk/models.json +0 -1
  112. data/lib/swarm_sdk/models.rb +0 -120
  113. data/lib/swarm_sdk/node_context.rb +0 -245
  114. data/lib/swarm_sdk/observer/builder.rb +0 -81
  115. data/lib/swarm_sdk/observer/config.rb +0 -45
  116. data/lib/swarm_sdk/observer/manager.rb +0 -236
  117. data/lib/swarm_sdk/patterns/agent_observer.rb +0 -160
  118. data/lib/swarm_sdk/permissions/config.rb +0 -239
  119. data/lib/swarm_sdk/permissions/error_formatter.rb +0 -121
  120. data/lib/swarm_sdk/permissions/path_matcher.rb +0 -35
  121. data/lib/swarm_sdk/permissions/validator.rb +0 -173
  122. data/lib/swarm_sdk/permissions_builder.rb +0 -122
  123. data/lib/swarm_sdk/plugin.rb +0 -309
  124. data/lib/swarm_sdk/plugin_registry.rb +0 -101
  125. data/lib/swarm_sdk/proc_helpers.rb +0 -53
  126. data/lib/swarm_sdk/prompts/base_system_prompt.md.erb +0 -117
  127. data/lib/swarm_sdk/restore_result.rb +0 -65
  128. data/lib/swarm_sdk/result.rb +0 -123
  129. data/lib/swarm_sdk/snapshot.rb +0 -156
  130. data/lib/swarm_sdk/snapshot_from_events.rb +0 -397
  131. data/lib/swarm_sdk/state_restorer.rb +0 -476
  132. data/lib/swarm_sdk/state_snapshot.rb +0 -334
  133. data/lib/swarm_sdk/swarm/agent_initializer.rb +0 -683
  134. data/lib/swarm_sdk/swarm/all_agents_builder.rb +0 -167
  135. data/lib/swarm_sdk/swarm/builder.rb +0 -249
  136. data/lib/swarm_sdk/swarm/executor.rb +0 -213
  137. data/lib/swarm_sdk/swarm/hook_triggers.rb +0 -150
  138. data/lib/swarm_sdk/swarm/logging_callbacks.rb +0 -340
  139. data/lib/swarm_sdk/swarm/mcp_configurator.rb +0 -154
  140. data/lib/swarm_sdk/swarm/swarm_registry_builder.rb +0 -67
  141. data/lib/swarm_sdk/swarm/tool_configurator.rb +0 -358
  142. data/lib/swarm_sdk/swarm.rb +0 -717
  143. data/lib/swarm_sdk/swarm_loader.rb +0 -145
  144. data/lib/swarm_sdk/swarm_registry.rb +0 -136
  145. data/lib/swarm_sdk/tools/bash.rb +0 -282
  146. data/lib/swarm_sdk/tools/clock.rb +0 -44
  147. data/lib/swarm_sdk/tools/delegate.rb +0 -267
  148. data/lib/swarm_sdk/tools/document_converters/base_converter.rb +0 -83
  149. data/lib/swarm_sdk/tools/document_converters/docx_converter.rb +0 -99
  150. data/lib/swarm_sdk/tools/document_converters/html_converter.rb +0 -101
  151. data/lib/swarm_sdk/tools/document_converters/pdf_converter.rb +0 -78
  152. data/lib/swarm_sdk/tools/document_converters/xlsx_converter.rb +0 -194
  153. data/lib/swarm_sdk/tools/edit.rb +0 -145
  154. data/lib/swarm_sdk/tools/glob.rb +0 -166
  155. data/lib/swarm_sdk/tools/grep.rb +0 -235
  156. data/lib/swarm_sdk/tools/image_extractors/docx_image_extractor.rb +0 -43
  157. data/lib/swarm_sdk/tools/image_extractors/pdf_image_extractor.rb +0 -163
  158. data/lib/swarm_sdk/tools/image_formats/tiff_builder.rb +0 -65
  159. data/lib/swarm_sdk/tools/multi_edit.rb +0 -236
  160. data/lib/swarm_sdk/tools/path_resolver.rb +0 -92
  161. data/lib/swarm_sdk/tools/read.rb +0 -261
  162. data/lib/swarm_sdk/tools/registry.rb +0 -205
  163. data/lib/swarm_sdk/tools/scratchpad/scratchpad_list.rb +0 -117
  164. data/lib/swarm_sdk/tools/scratchpad/scratchpad_read.rb +0 -97
  165. data/lib/swarm_sdk/tools/scratchpad/scratchpad_write.rb +0 -108
  166. data/lib/swarm_sdk/tools/stores/read_tracker.rb +0 -96
  167. data/lib/swarm_sdk/tools/stores/scratchpad_storage.rb +0 -272
  168. data/lib/swarm_sdk/tools/stores/storage.rb +0 -142
  169. data/lib/swarm_sdk/tools/stores/todo_manager.rb +0 -65
  170. data/lib/swarm_sdk/tools/think.rb +0 -98
  171. data/lib/swarm_sdk/tools/todo_write.rb +0 -235
  172. data/lib/swarm_sdk/tools/web_fetch.rb +0 -262
  173. data/lib/swarm_sdk/tools/write.rb +0 -112
  174. data/lib/swarm_sdk/utils.rb +0 -68
  175. data/lib/swarm_sdk/validation_result.rb +0 -33
  176. data/lib/swarm_sdk/version.rb +0 -5
  177. data/lib/swarm_sdk/workflow/agent_config.rb +0 -79
  178. data/lib/swarm_sdk/workflow/builder.rb +0 -143
  179. data/lib/swarm_sdk/workflow/executor.rb +0 -497
  180. data/lib/swarm_sdk/workflow/node_builder.rb +0 -555
  181. data/lib/swarm_sdk/workflow/transformer_executor.rb +0 -249
  182. data/lib/swarm_sdk/workflow.rb +0 -554
  183. data/lib/swarm_sdk.rb +0 -524
  184. /data/lib/swarm_memory/{errors.rb → error.rb} +0 -0
@@ -1,235 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module SwarmSDK
4
- module Tools
5
- # Grep tool for searching file contents using ripgrep-style patterns
6
- #
7
- # Powerful search capabilities with regex support, context lines, and filtering.
8
- # Built on ripgrep (rg) for fast, efficient searching.
9
- class Grep < RubyLLM::Tool
10
- include PathResolver
11
-
12
- # Factory pattern: declare what parameters this tool needs for instantiation
13
- class << self
14
- def creation_requirements
15
- [:directory]
16
- end
17
- end
18
-
19
- def initialize(directory:)
20
- super()
21
- @directory = File.expand_path(directory)
22
- end
23
-
24
- define_method(:name) { "Grep" }
25
-
26
- description <<~DESC
27
- A powerful search tool built on ripgrep
28
-
29
- Usage:
30
- - ALWAYS use Grep for search tasks. NEVER invoke `grep` or `rg` as a Bash command. The Grep tool has been optimized for correct permissions and access.
31
- - Supports full regex syntax (e.g., "log.*Error", "function\\s+\\w+")
32
- - Filter files with glob parameter (e.g., "*.js", "**/*.tsx") or type parameter (e.g., "js", "py", "rust")
33
- - Output modes: "content" shows matching lines, "files_with_matches" shows only file paths (default), "count" shows match counts
34
- - Use Task tool for open-ended searches requiring multiple rounds
35
- - Pattern syntax: Uses ripgrep (not grep) - literal braces need escaping (use `interface\\{\\}` to find `interface{}` in Go code)
36
- - Multiline matching: By default patterns match within single lines only. For cross-line patterns like `struct \\{[\\s\\S]*?field`, use `multiline: true`
37
- DESC
38
-
39
- param :pattern,
40
- type: "string",
41
- desc: "The regular expression pattern to search for in file contents",
42
- required: true
43
-
44
- param :path,
45
- type: "string",
46
- desc: "File or directory to search in (rg PATH). Defaults to current working directory.",
47
- required: false
48
-
49
- param :glob,
50
- type: "string",
51
- desc: "Glob pattern to filter files (e.g. \"*.js\", \"*.{ts,tsx}\") - maps to rg --glob",
52
- required: false
53
-
54
- param :type,
55
- type: "string",
56
- desc: "File type to search (rg --type). Common types: c, cpp, cs, csharp, css, dart, docker, dockercompose, elixir, erlang, go, graphql, haskell, html, java, js, json, kotlin, lua, make, markdown, md, php, py, python, ruby, rust, sass, scala, sh, sql, svelte, swift, tf, toml, ts, typescript, vim, vue, xml, yaml, zig",
57
- required: false
58
-
59
- param :output_mode,
60
- type: "string",
61
- desc: "Output mode: \"content\" shows matching lines (supports context/line number options), \"files_with_matches\" shows file paths (default), \"count\" shows match counts. All modes support head_limit.",
62
- required: false
63
-
64
- param :case_insensitive,
65
- type: "boolean",
66
- desc: "Case insensitive search (rg -i)",
67
- required: false
68
-
69
- param :multiline,
70
- type: "boolean",
71
- desc: "Enable multiline mode where . matches newlines and patterns can span lines (rg -U --multiline-dotall)",
72
- required: false
73
-
74
- param :context_before,
75
- type: "integer",
76
- desc: "Number of lines to show before each match (rg -B). Requires output_mode: \"content\", ignored otherwise.",
77
- required: false
78
-
79
- param :context_after,
80
- type: "integer",
81
- desc: "Number of lines to show after each match (rg -A). Requires output_mode: \"content\", ignored otherwise.",
82
- required: false
83
-
84
- param :context,
85
- type: "integer",
86
- desc: "Number of lines to show before and after each match (rg -C). Requires output_mode: \"content\", ignored otherwise.",
87
- required: false
88
-
89
- param :show_line_numbers,
90
- type: "boolean",
91
- desc: "Show line numbers in output (rg -n). Requires output_mode: \"content\", ignored otherwise.",
92
- required: false
93
-
94
- param :head_limit,
95
- type: "integer",
96
- desc: "Limit output to first N lines/entries, equivalent to \"| head -N\". Works across all output modes: content (limits output lines), files_with_matches (limits file paths), count (limits count entries). When unspecified, shows all results from ripgrep.",
97
- required: false
98
-
99
- def execute(
100
- pattern:,
101
- path: nil,
102
- glob: nil,
103
- type: nil,
104
- output_mode: "files_with_matches",
105
- case_insensitive: false,
106
- multiline: false,
107
- context_before: nil,
108
- context_after: nil,
109
- context: nil,
110
- show_line_numbers: false,
111
- head_limit: nil
112
- )
113
- # Validate inputs
114
- return validation_error("pattern is required") if pattern.nil? || pattern.empty?
115
-
116
- # CRITICAL: Default path to agent's directory (NOT current directory)
117
- path = if path.nil? || path.to_s.strip.empty?
118
- @directory
119
- else
120
- # Resolve relative paths against agent directory
121
- resolve_path(path)
122
- end
123
-
124
- # Validate output_mode
125
- valid_modes = ["content", "files_with_matches", "count"]
126
- unless valid_modes.include?(output_mode)
127
- return validation_error("output_mode must be one of: #{valid_modes.join(", ")}")
128
- end
129
-
130
- # Build ripgrep command
131
- cmd = ["rg"]
132
-
133
- # Output mode flags
134
- case output_mode
135
- when "files_with_matches"
136
- cmd << "-l" # List files with matches
137
- when "count"
138
- cmd << "-c" # Count matches per file
139
- when "content"
140
- # Default mode, no special flag needed
141
- # Add line numbers if requested
142
- cmd << "-n" if show_line_numbers
143
-
144
- # Add context flags
145
- cmd << "-B" << context_before.to_s if context_before
146
- cmd << "-A" << context_after.to_s if context_after
147
- cmd << "-C" << context.to_s if context
148
- end
149
-
150
- # Case sensitivity
151
- cmd << "-i" if case_insensitive
152
-
153
- # Multiline mode
154
- if multiline
155
- cmd << "-U" << "--multiline-dotall"
156
- end
157
-
158
- # File filtering (only add if non-empty)
159
- cmd << "--type" << type if type && !type.to_s.strip.empty?
160
- cmd << "--glob" << glob if glob && !glob.to_s.strip.empty?
161
-
162
- # Pattern
163
- cmd << "-e" << pattern
164
-
165
- # Path
166
- cmd << path
167
-
168
- # Execute command
169
- begin
170
- require "open3"
171
-
172
- stdout, stderr, status = Open3.capture3(*cmd)
173
-
174
- # Handle no matches (exit code 1 for ripgrep means no matches found)
175
- if status.exitstatus == 1 && stderr.empty?
176
- return "No matches found for pattern: #{pattern}"
177
- end
178
-
179
- # Handle errors (exit code 2 means error)
180
- if status.exitstatus == 2 || !stderr.empty?
181
- return error("ripgrep error: #{stderr}")
182
- end
183
-
184
- # Success - format output
185
- output = stdout
186
-
187
- # Apply head_limit if specified
188
- if head_limit && head_limit > 0
189
- lines = output.lines
190
- if lines.count > head_limit
191
- output = lines.take(head_limit).join
192
- output += "\n\n<system-reminder>Output limited to first #{head_limit} lines. Total results: #{lines.count} lines.</system-reminder>"
193
- end
194
- end
195
-
196
- # Add reminder about usage
197
- reminder = build_usage_reminder(output_mode, pattern)
198
- output = "#{output}\n\n#{reminder}" unless reminder.empty?
199
-
200
- output.empty? ? "No matches found for pattern: #{pattern}" : output
201
- rescue Errno::ENOENT
202
- error("ripgrep (rg) is not installed or not in PATH. Please install ripgrep to use the Grep tool.")
203
- rescue Errno::EACCES
204
- error("Permission denied: Cannot search in '#{path}'")
205
- rescue StandardError => e
206
- error("Failed to execute search: #{e.class.name} - #{e.message}")
207
- end
208
- rescue StandardError => e
209
- error("Unexpected error during search: #{e.class.name} - #{e.message}")
210
- end
211
-
212
- private
213
-
214
- def validation_error(message)
215
- "<tool_use_error>InputValidationError: #{message}</tool_use_error>"
216
- end
217
-
218
- def error(message)
219
- "Error: #{message}"
220
- end
221
-
222
- def build_usage_reminder(output_mode, pattern)
223
- return "" if output_mode == "content"
224
-
225
- <<~REMINDER
226
- <system-reminder>
227
- You used output_mode: '#{output_mode}' which only shows #{output_mode == "files_with_matches" ? "file paths" : "match counts"}.
228
- To see the actual matching lines and their content, use output_mode: 'content'.
229
- You can also add show_line_numbers: true and context lines (context_before, context_after, or context) for better context.
230
- </system-reminder>
231
- REMINDER
232
- end
233
- end
234
- end
235
- end
@@ -1,43 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module SwarmSDK
4
- module Tools
5
- module ImageExtractors
6
- # Extracts images from DOCX documents
7
- # DOCX files are ZIP archives with images stored in word/media/
8
- class DocxImageExtractor
9
- class << self
10
- # Extract all images from a DOCX document
11
- # @param doc [Docx::Document] The DOCX document instance
12
- # @param docx_path [String] Path to the DOCX file
13
- # @return [Array<String>] Array of temporary file paths containing extracted images
14
- def extract_images(doc, docx_path)
15
- image_paths = []
16
- temp_dir = Dir.mktmpdir("docx_images_#{File.basename(docx_path, ".*")}")
17
-
18
- # DOCX files are ZIP archives with images in word/media/
19
- doc.zip.glob("word/media/*").each do |entry|
20
- next unless entry.file?
21
-
22
- # Check if it's an image by extension
23
- next unless entry.name.match?(/\.(png|jpe?g|gif|bmp|tiff?)$/i)
24
-
25
- output_path = File.join(temp_dir, File.basename(entry.name))
26
-
27
- File.open(output_path, "wb") do |f|
28
- f.write(doc.zip.read(entry.name))
29
- end
30
-
31
- image_paths << output_path
32
- end
33
-
34
- image_paths
35
- rescue StandardError
36
- # If image extraction fails, don't fail the entire document read
37
- []
38
- end
39
- end
40
- end
41
- end
42
- end
43
- end
@@ -1,163 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module SwarmSDK
4
- module Tools
5
- module ImageExtractors
6
- # Extracts images from PDF documents
7
- # Supports JPEG (DCTDecode), FlateDecode, and LZWDecode formats
8
- # Converts non-JPEG images to TIFF format
9
- class PdfImageExtractor
10
- class << self
11
- # Extract all images from a PDF document
12
- # @param reader [PDF::Reader] The PDF reader instance
13
- # @param pdf_path [String] Path to the PDF file
14
- # @return [Array<String>] Array of temporary file paths containing extracted images
15
- def extract_images(reader, pdf_path)
16
- image_paths = []
17
- temp_dir = Dir.mktmpdir("pdf_images_#{File.basename(pdf_path, ".*")}")
18
-
19
- reader.pages.each_with_index do |page, page_index|
20
- page_images = extract_from_page(page, page_index + 1, temp_dir)
21
- image_paths.concat(page_images)
22
- end
23
-
24
- image_paths
25
- rescue StandardError
26
- # If image extraction fails, log it but don't fail the entire PDF read
27
- []
28
- end
29
-
30
- # Extract images from a single PDF page
31
- # @param page [PDF::Reader::Page] The PDF page
32
- # @param page_number [Integer] Page number (1-indexed)
33
- # @param temp_dir [String] Directory to save extracted images
34
- # @return [Array<String>] Array of file paths for extracted images
35
- def extract_from_page(page, page_number, temp_dir)
36
- extracted_files = []
37
-
38
- # Get XObjects (external objects) from the page
39
- xobjects = page.xobjects
40
- return extracted_files if xobjects.empty?
41
-
42
- xobjects.each do |name, stream|
43
- # Only process Image XObjects (not Form XObjects)
44
- next unless stream.hash[:Subtype] == :Image
45
-
46
- file_path = save_image(stream, page_number, name, temp_dir)
47
- extracted_files << file_path if file_path
48
- end
49
-
50
- extracted_files
51
- rescue StandardError
52
- # If extraction fails for this page, continue with others
53
- []
54
- end
55
-
56
- # Save a PDF image stream to disk
57
- # Supports JPEG (DCTDecode) and raw formats
58
- # @param stream [PDF::Reader::Stream] The image stream
59
- # @param page_number [Integer] Page number
60
- # @param name [Symbol] Image name from XObject
61
- # @param temp_dir [String] Directory to save the image
62
- # @return [String, nil] File path if successful, nil otherwise
63
- def save_image(stream, page_number, name, temp_dir)
64
- filter = stream.hash[:Filter]
65
-
66
- case filter
67
- when :DCTDecode
68
- # JPEG images can be saved directly
69
- save_jpeg(stream, page_number, name, temp_dir)
70
- when :FlateDecode, :LZWDecode, nil
71
- # Raw or compressed formats - save as TIFF
72
- save_as_tiff(stream, page_number, name, temp_dir)
73
- end
74
- # Unsupported formats return nil
75
- rescue StandardError
76
- # If saving fails, skip this image
77
- nil
78
- end
79
-
80
- # Save JPEG image directly from PDF stream
81
- # @param stream [PDF::Reader::Stream] The image stream
82
- # @param page_number [Integer] Page number
83
- # @param name [Symbol] Image name
84
- # @param temp_dir [String] Directory to save the image
85
- # @return [String] File path
86
- def save_jpeg(stream, page_number, name, temp_dir)
87
- filename = File.join(temp_dir, "page-#{page_number}-#{name}.jpg")
88
-
89
- # JPEG images can be written directly - the stream.data contains a complete JPEG file
90
- File.open(filename, "wb") do |file|
91
- file.write(stream.data)
92
- end
93
-
94
- filename
95
- end
96
-
97
- # Save raw image data as TIFF
98
- # @param stream [PDF::Reader::Stream] The image stream
99
- # @param page_number [Integer] Page number
100
- # @param name [Symbol] Image name
101
- # @param temp_dir [String] Directory to save the image
102
- # @return [String, nil] File path if successful, nil for unsupported color spaces
103
- def save_as_tiff(stream, page_number, name, temp_dir)
104
- color_space = stream.hash[:ColorSpace]
105
-
106
- case color_space
107
- when :DeviceRGB
108
- save_rgb_tiff(stream, page_number, name, temp_dir)
109
- when :DeviceGray
110
- save_gray_tiff(stream, page_number, name, temp_dir)
111
- end
112
- # Unsupported color spaces return nil
113
- rescue StandardError
114
- # If conversion fails, skip this image
115
- nil
116
- end
117
-
118
- # Save RGB image as TIFF
119
- # @param stream [PDF::Reader::Stream] The image stream
120
- # @param page_number [Integer] Page number
121
- # @param name [Symbol] Image name
122
- # @param temp_dir [String] Directory to save the image
123
- # @return [String] File path
124
- def save_rgb_tiff(stream, page_number, name, temp_dir)
125
- filename = File.join(temp_dir, "page-#{page_number}-#{name}.tif")
126
-
127
- width = stream.hash[:Width]
128
- height = stream.hash[:Height]
129
- bpc = stream.hash[:BitsPerComponent] || 8
130
-
131
- # Build TIFF header
132
- tiff = ImageFormats::TiffBuilder.build_rgb_header(width, height, bpc)
133
- tiff << stream.unfiltered_data # Get decompressed raw pixel data
134
-
135
- File.open(filename, "wb") { |file| file.write(tiff) }
136
- filename
137
- end
138
-
139
- # Save grayscale image as TIFF
140
- # @param stream [PDF::Reader::Stream] The image stream
141
- # @param page_number [Integer] Page number
142
- # @param name [Symbol] Image name
143
- # @param temp_dir [String] Directory to save the image
144
- # @return [String] File path
145
- def save_gray_tiff(stream, page_number, name, temp_dir)
146
- filename = File.join(temp_dir, "page-#{page_number}-#{name}.tif")
147
-
148
- width = stream.hash[:Width]
149
- height = stream.hash[:Height]
150
- bpc = stream.hash[:BitsPerComponent] || 8
151
-
152
- # Build TIFF header for grayscale
153
- tiff = ImageFormats::TiffBuilder.build_gray_header(width, height, bpc)
154
- tiff << stream.unfiltered_data
155
-
156
- File.open(filename, "wb") { |file| file.write(tiff) }
157
- filename
158
- end
159
- end
160
- end
161
- end
162
- end
163
- end
@@ -1,65 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module SwarmSDK
4
- module Tools
5
- module ImageFormats
6
- # Builds TIFF image files from raw pixel data
7
- # Supports RGB and grayscale color spaces
8
- class TiffBuilder
9
- class << self
10
- # Build TIFF header for RGB images
11
- # @param width [Integer] Image width in pixels
12
- # @param height [Integer] Image height in pixels
13
- # @param bpc [Integer] Bits per component (typically 8)
14
- # @return [String] Binary TIFF header
15
- def build_rgb_header(width, height, bpc)
16
- # Helper lambdas for TIFF tags
17
- long_tag = ->(tag, count, value) { [tag, 4, count, value].pack("ssII") }
18
- short_tag = ->(tag, count, value) { [tag, 3, count, value].pack("ssII") }
19
-
20
- tag_count = 8
21
- header = [73, 73, 42, 8, tag_count].pack("ccsIs") # Little-endian TIFF
22
-
23
- tiff = header.dup
24
- tiff << short_tag.call(256, 1, width) # ImageWidth
25
- tiff << short_tag.call(257, 1, height) # ImageHeight
26
- tiff << long_tag.call(258, 3, header.size + (tag_count * 12) + 4) # BitsPerSample
27
- tiff << short_tag.call(259, 1, 1) # Compression (none)
28
- tiff << short_tag.call(262, 1, 2) # PhotometricInterpretation (RGB)
29
- tiff << long_tag.call(273, 1, header.size + (tag_count * 12) + 16) # StripOffsets
30
- tiff << short_tag.call(277, 1, 3) # SamplesPerPixel
31
- tiff << long_tag.call(279, 1, width * height * 3) # StripByteCounts
32
- tiff << [0].pack("I") # Next IFD pointer
33
- tiff << [bpc, bpc, bpc].pack("III") # BitsPerSample values
34
- tiff
35
- end
36
-
37
- # Build TIFF header for grayscale images
38
- # @param width [Integer] Image width in pixels
39
- # @param height [Integer] Image height in pixels
40
- # @param bpc [Integer] Bits per component (typically 8)
41
- # @return [String] Binary TIFF header
42
- def build_gray_header(width, height, bpc)
43
- long_tag = ->(tag, count, value) { [tag, 4, count, value].pack("ssII") }
44
- short_tag = ->(tag, count, value) { [tag, 3, count, value].pack("ssII") }
45
-
46
- tag_count = 8
47
- header = [73, 73, 42, 8, tag_count].pack("ccsIs")
48
-
49
- tiff = header.dup
50
- tiff << short_tag.call(256, 1, width) # ImageWidth
51
- tiff << short_tag.call(257, 1, height) # ImageHeight
52
- tiff << short_tag.call(258, 1, bpc) # BitsPerSample
53
- tiff << short_tag.call(259, 1, 1) # Compression (none)
54
- tiff << short_tag.call(262, 1, 1) # PhotometricInterpretation (MinIsBlack)
55
- tiff << long_tag.call(273, 1, header.size + (tag_count * 12) + 4) # StripOffsets
56
- tiff << short_tag.call(277, 1, 1) # SamplesPerPixel
57
- tiff << long_tag.call(279, 1, width * height) # StripByteCounts
58
- tiff << [0].pack("I") # Next IFD pointer
59
- tiff
60
- end
61
- end
62
- end
63
- end
64
- end
65
- end