openclacky 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +39 -0
  3. data/README.md +87 -53
  4. data/lib/clacky/agent/cost_tracker.rb +19 -2
  5. data/lib/clacky/agent/llm_caller.rb +218 -0
  6. data/lib/clacky/agent/message_compressor_helper.rb +32 -2
  7. data/lib/clacky/agent.rb +54 -22
  8. data/lib/clacky/client.rb +44 -5
  9. data/lib/clacky/default_parsers/pdf_parser.rb +58 -17
  10. data/lib/clacky/default_parsers/pdf_parser_ocr.py +103 -0
  11. data/lib/clacky/default_parsers/pdf_parser_plumber.py +62 -0
  12. data/lib/clacky/default_skills/deploy/SKILL.md +201 -77
  13. data/lib/clacky/default_skills/new/SKILL.md +3 -114
  14. data/lib/clacky/default_skills/onboard/SKILL.md +349 -133
  15. data/lib/clacky/default_skills/onboard/scripts/import_external_skills.rb +371 -0
  16. data/lib/clacky/default_skills/onboard/scripts/install_builtin_skills.rb +175 -0
  17. data/lib/clacky/default_skills/skill-add/scripts/install_from_zip.rb +59 -26
  18. data/lib/clacky/message_format/anthropic.rb +72 -8
  19. data/lib/clacky/message_format/bedrock.rb +6 -3
  20. data/lib/clacky/providers.rb +146 -3
  21. data/lib/clacky/server/channel/adapters/feishu/adapter.rb +14 -0
  22. data/lib/clacky/server/channel/adapters/feishu/bot.rb +10 -0
  23. data/lib/clacky/server/channel/adapters/feishu/message_parser.rb +1 -0
  24. data/lib/clacky/server/channel/channel_manager.rb +12 -4
  25. data/lib/clacky/server/channel/channel_ui_controller.rb +8 -2
  26. data/lib/clacky/server/http_server.rb +746 -13
  27. data/lib/clacky/server/session_registry.rb +55 -24
  28. data/lib/clacky/skill.rb +10 -9
  29. data/lib/clacky/skill_loader.rb +23 -11
  30. data/lib/clacky/tools/file_reader.rb +232 -127
  31. data/lib/clacky/tools/security.rb +42 -64
  32. data/lib/clacky/tools/terminal/persistent_session.rb +15 -4
  33. data/lib/clacky/tools/terminal/safe_rm.sh +106 -0
  34. data/lib/clacky/tools/terminal/session_manager.rb +8 -3
  35. data/lib/clacky/tools/terminal.rb +263 -16
  36. data/lib/clacky/ui2/layout_manager.rb +8 -1
  37. data/lib/clacky/ui2/output_buffer.rb +83 -23
  38. data/lib/clacky/ui2/ui_controller.rb +74 -7
  39. data/lib/clacky/utils/file_processor.rb +14 -40
  40. data/lib/clacky/utils/model_pricing.rb +215 -0
  41. data/lib/clacky/utils/parser_manager.rb +70 -6
  42. data/lib/clacky/utils/string_matcher.rb +23 -1
  43. data/lib/clacky/version.rb +1 -1
  44. data/lib/clacky/web/app.css +673 -9
  45. data/lib/clacky/web/app.js +40 -1608
  46. data/lib/clacky/web/i18n.js +209 -0
  47. data/lib/clacky/web/index.html +166 -2
  48. data/lib/clacky/web/onboard.js +77 -1
  49. data/lib/clacky/web/profile.js +442 -0
  50. data/lib/clacky/web/sessions.js +1034 -2
  51. data/lib/clacky/web/settings.js +127 -6
  52. data/lib/clacky/web/sidebar.js +39 -0
  53. data/lib/clacky/web/skills.js +460 -0
  54. data/lib/clacky/web/trash.js +343 -0
  55. data/lib/clacky/web/ws-dispatcher.js +255 -0
  56. data/lib/clacky.rb +5 -3
  57. metadata +16 -17
  58. data/lib/clacky/clacky_auth_client.rb +0 -152
  59. data/lib/clacky/clacky_cloud_config.rb +0 -123
  60. data/lib/clacky/cloud_project_client.rb +0 -169
  61. data/lib/clacky/default_skills/deploy/scripts/rails_deploy.rb +0 -1377
  62. data/lib/clacky/default_skills/deploy/tools/check_health.rb +0 -116
  63. data/lib/clacky/default_skills/deploy/tools/create_database_service.rb +0 -341
  64. data/lib/clacky/default_skills/deploy/tools/execute_deployment.rb +0 -99
  65. data/lib/clacky/default_skills/deploy/tools/fetch_runtime_logs.rb +0 -77
  66. data/lib/clacky/default_skills/deploy/tools/list_services.rb +0 -67
  67. data/lib/clacky/default_skills/deploy/tools/report_deploy_status.rb +0 -67
  68. data/lib/clacky/default_skills/deploy/tools/set_deploy_variables.rb +0 -189
  69. data/lib/clacky/default_skills/new/scripts/cloud_project_init.sh +0 -74
  70. data/lib/clacky/deploy_api_client.rb +0 -484
@@ -218,31 +218,62 @@ module Clacky
218
218
 
219
219
  ordered = pinned_section + non_pinned
220
220
 
221
- ordered.map do |s|
222
- id = s[:session_id]
223
- ls = live[id]
224
- {
225
- id: id,
226
- name: ls&.dig(:name) || s[:name] || "",
227
- status: ls ? ls[:status].to_s : "idle",
228
- error: ls ? ls[:error] : nil,
229
- model: ls&.dig(:model),
230
- source: s_source(s),
231
- agent_profile: (s[:agent_profile] || "general").to_s,
232
- working_dir: s[:working_dir],
233
- created_at: s[:created_at],
234
- updated_at: s[:updated_at],
235
- total_tasks: ls&.dig(:total_tasks) || s.dig(:stats, :total_tasks) || 0,
236
- total_cost: ls&.dig(:total_cost) || s.dig(:stats, :total_cost_usd) || 0.0,
237
- cost_source: (ls&.dig(:cost_source) || s.dig(:stats, :cost_source) || "estimated").to_s,
238
- # latest_latency is in-memory only (live sessions) — not persisted
239
- # at the session-level on disk. The on-disk source of truth is
240
- # per-assistant-message `latency` fields in messages[]. Reloaded
241
- # sessions start with nil and get populated on the next LLM call.
242
- latest_latency: ls&.dig(:latest_latency),
243
- pinned: s[:pinned] || false,
244
- }
221
+ ordered.map { |s| build_enriched_row(s, live[s[:session_id]]) }
222
+ end
223
+
224
+ # Return the same enriched hash that a `list` row would produce, for a
225
+ # single session — merging on-disk fields with in-memory live fields.
226
+ # Returns nil if the session is unknown on disk.
227
+ #
228
+ # This is the targeted, O(1) counterpart to `list` used by the WS layer
229
+ # when it only needs one row (e.g. pushing a fresh snapshot to a client
230
+ # that just (re)subscribed, or broadcasting a status-change update).
231
+ def snapshot(session_id)
232
+ return nil unless @session_manager
233
+ disk = @session_manager.load(session_id)
234
+ return nil unless disk
235
+
236
+ live = @mutex.synchronize do
237
+ s = @sessions[session_id]
238
+ next nil unless s
239
+ model_info = s[:agent]&.current_model_info
240
+ live_name = s[:agent]&.name
241
+ live_name = nil if live_name&.empty?
242
+ { status: s[:status], error: s[:error], model: model_info&.dig(:model),
243
+ name: live_name, total_tasks: s[:agent]&.total_tasks,
244
+ total_cost: s[:agent]&.total_cost, cost_source: s[:agent]&.cost_source,
245
+ latest_latency: s[:agent]&.latest_latency }
245
246
  end
247
+
248
+ build_enriched_row(disk, live)
249
+ end
250
+
251
+ # Merge a single disk-side session hash with the corresponding live
252
+ # in-memory agent fields (may be nil) into the row shape the frontend
253
+ # consumes.
254
+ private def build_enriched_row(s, ls)
255
+ id = s[:session_id]
256
+ {
257
+ id: id,
258
+ name: ls&.dig(:name) || s[:name] || "",
259
+ status: ls ? ls[:status].to_s : "idle",
260
+ error: ls ? ls[:error] : nil,
261
+ model: ls&.dig(:model),
262
+ source: s_source(s),
263
+ agent_profile: (s[:agent_profile] || "general").to_s,
264
+ working_dir: s[:working_dir],
265
+ created_at: s[:created_at],
266
+ updated_at: s[:updated_at],
267
+ total_tasks: ls&.dig(:total_tasks) || s.dig(:stats, :total_tasks) || 0,
268
+ total_cost: ls&.dig(:total_cost) || s.dig(:stats, :total_cost_usd) || 0.0,
269
+ cost_source: (ls&.dig(:cost_source) || s.dig(:stats, :cost_source) || "estimated").to_s,
270
+ # latest_latency is in-memory only (live sessions) — not persisted
271
+ # at the session-level on disk. The on-disk source of truth is
272
+ # per-assistant-message `latency` fields in messages[]. Reloaded
273
+ # sessions start with nil and get populated on the next LLM call.
274
+ latest_latency: ls&.dig(:latest_latency),
275
+ pinned: s[:pinned] || false,
276
+ }
246
277
  end
247
278
 
248
279
 
data/lib/clacky/skill.rb CHANGED
@@ -514,19 +514,20 @@ module Clacky
514
514
  @warnings << "Invalid name '#{@name}' in metadata; using directory name '#{dir_slug}' instead."
515
515
  @name = dir_slug
516
516
  else
517
- # Unrecoverable: both name and directory slug are invalid mark skill as invalid
518
- @invalid = true
519
- @invalid_reason = "Invalid skill name '#{@name}' and directory name '#{dir_slug}' is also not a valid slug. " \
520
- "Expected lowercase letters, numbers, and hyphens (e.g. 'my-skill')."
521
- @name = nil
517
+ # Both name and directory slug are invalid (e.g. contains dots from version suffix).
518
+ # Record a warning but keep the skill usable — do not mark as invalid.
519
+ @warnings << "Invalid skill name '#{@name}' and directory name '#{dir_slug}' is also not a valid slug. " \
520
+ "Expected lowercase letters, numbers, and hyphens (e.g. 'my-skill')."
521
+ @name = dir_slug
522
522
  end
523
523
  end
524
524
  else
525
- # No name in frontmatter — check the directory slug itself
525
+ # No name in frontmatter — check the directory slug itself.
526
+ # Non-conforming names (e.g. version-suffixed dirs like "test-runner-1.0.0")
527
+ # are allowed with a warning rather than being rejected outright.
526
528
  unless valid_slug.call(dir_slug)
527
- @invalid = true
528
- @invalid_reason = "Directory name '#{dir_slug}' is not a valid skill slug. " \
529
- "Expected lowercase letters, numbers, and hyphens (e.g. 'my-skill')."
529
+ @warnings << "Directory name '#{dir_slug}' is not a valid skill slug. " \
530
+ "Expected lowercase letters, numbers, and hyphens (e.g. 'my-skill')."
530
531
  end
531
532
  end
532
533
 
@@ -287,20 +287,32 @@ module Clacky
287
287
  def load_skills_from_directory(dir, source_type)
288
288
  return [] unless dir.exist?
289
289
 
290
+ source_path = case source_type
291
+ when :global_clacky
292
+ Pathname.new(ENV.fetch("HOME", "~")).join(".clacky")
293
+ when :project_clacky
294
+ Pathname.new(@working_dir)
295
+ else
296
+ dir
297
+ end
298
+
290
299
  skills = []
291
- dir.children.select(&:directory?).each do |skill_dir|
292
- source_path = case source_type
293
- when :global_clacky
294
- Pathname.new(ENV.fetch("HOME", "~")).join(".clacky")
295
- when :project_clacky
296
- Pathname.new(@working_dir)
300
+ dir.children.select(&:directory?).each do |entry|
301
+ if entry.join("SKILL.md").exist?
302
+ # Direct skill directory
303
+ skill = load_single_skill(entry, source_path, entry.basename.to_s, source_type)
304
+ skills << skill if skill
297
305
  else
298
- skill_dir
306
+ # Treat as a category directory — scan one level deeper for skills.
307
+ # This allows grouping skills under ~/.clacky/skills/<category>/<skill>/SKILL.md
308
+ # (e.g. openclaw-imports/my-skill/SKILL.md) without changing the loader contract.
309
+ entry.children.select(&:directory?).each do |skill_dir|
310
+ next unless skill_dir.join("SKILL.md").exist?
311
+
312
+ skill = load_single_skill(skill_dir, source_path, skill_dir.basename.to_s, source_type)
313
+ skills << skill if skill
314
+ end
299
315
  end
300
-
301
- skill_name = skill_dir.basename.to_s
302
- skill = load_single_skill(skill_dir, source_path, skill_name, source_type)
303
- skills << skill if skill
304
316
  end
305
317
  skills
306
318
  end
@@ -7,7 +7,7 @@ module Clacky
7
7
  module Tools
8
8
  class FileReader < Base
9
9
  self.tool_name = "file_reader"
10
- self.tool_description = "Read contents of a file from the filesystem. Supports text files, images (PNG/JPG/GIF/WEBP), and documents (PDF/DOCX/XLSX/PPTX)."
10
+ self.tool_description = "Read contents of a file from the filesystem. Supports text files, images (PNG/JPG/GIF/WEBP), and documents (PDF/DOCX/XLSX/PPTX — auto-converted to text via parsers, with OCR fallback for scanned PDFs)."
11
11
  self.tool_category = "file_system"
12
12
  self.tool_parameters = {
13
13
  type: "object",
@@ -39,7 +39,7 @@ module Clacky
39
39
  MAX_TEXT_FILE_SIZE = 1 * 1024 * 1024
40
40
 
41
41
  # Maximum content size to return (~10,000 tokens = ~40,000 characters)
42
- MAX_CONTENT_CHARS = 80_000
42
+ MAX_CONTENT_CHARS = 60_000
43
43
 
44
44
  # Maximum characters per line (prevent single huge lines from bloating tokens)
45
45
  MAX_LINE_CHARS = 1000
@@ -70,103 +70,178 @@ module Clacky
70
70
  end
71
71
 
72
72
  begin
73
- # Check if file is binary
74
- if Utils::FileProcessor.binary_file_path?(expanded_path)
75
- return handle_binary_file(expanded_path)
76
- end
73
+ # Delegate to FileProcessor for file type dispatch. FileProcessor is
74
+ # the single source of truth for how a file becomes a readable form
75
+ # (parser-extracted text, image base64, archive listing, plain text).
76
+ # FileReader here only shapes the result for the LLM.
77
+ ref = Utils::FileProcessor.process_path(expanded_path)
78
+
79
+ case ref.type
80
+ when :image
81
+ # Images go to LLM as base64 via the image_inject sidecar channel.
82
+ return handle_image_file(expanded_path)
83
+
84
+ when :pdf, :document, :spreadsheet, :presentation
85
+ # Parser-backed document formats. FileProcessor has already
86
+ # produced a preview markdown file (or set parse_error on failure).
87
+ if ref.preview_path && File.exist?(ref.preview_path)
88
+ return read_text_file(
89
+ expanded_path,
90
+ max_lines: max_lines,
91
+ start_line: start_line,
92
+ end_line: end_line,
93
+ source_path: ref.preview_path,
94
+ parsed_from: ref.type
95
+ )
96
+ else
97
+ return build_parser_failure_result(expanded_path, ref)
98
+ end
77
99
 
78
- # Check text file size (only for non-binary files)
79
- file_size = File.size(expanded_path)
80
- if file_size > MAX_TEXT_FILE_SIZE
81
- return {
82
- path: expanded_path,
83
- content: nil,
84
- size_bytes: file_size,
85
- error: "Text file too large: #{format_file_size(file_size)} (max: #{format_file_size(MAX_TEXT_FILE_SIZE)}). Please use grep tool to search within this file instead."
86
- }
87
- end
100
+ when :text, :csv, :zip
101
+ # FileProcessor already produced a preview (raw text copy for
102
+ # text/csv, archive listing for zip/tar). Read the preview with
103
+ # normal line-range + truncation rules.
104
+ source = (ref.preview_path && File.exist?(ref.preview_path)) ? ref.preview_path : expanded_path
105
+ return read_text_file(
106
+ expanded_path,
107
+ max_lines: max_lines,
108
+ start_line: start_line,
109
+ end_line: end_line,
110
+ source_path: source
111
+ )
88
112
 
89
- # Read text file with optional line range.
90
- # Scrub invalid UTF-8 bytes (e.g. GBK-encoded files) so downstream
91
- # JSON.generate / history persistence won't blow up later.
92
- all_lines = File.readlines(expanded_path).map! { |line| safe_utf8(line) }
93
- total_lines = all_lines.size
94
-
95
- # Calculate start index (convert 1-indexed to 0-indexed)
96
- start_idx = start_line ? [start_line - 1, 0].max : 0
97
-
98
- # Calculate end index based on parameters
99
- if end_line
100
- # User specified end_line directly
101
- end_idx = [end_line - 1, total_lines - 1].min
102
- elsif start_line
103
- # start_line + max_lines - 1 (relative to start_line, inclusive)
104
- calculated_end_line = start_line + max_lines - 1
105
- end_idx = [calculated_end_line - 1, total_lines - 1].min
106
113
  else
107
- # Read from beginning with max_lines limit
108
- end_idx = [max_lines - 1, total_lines - 1].min
109
- end
110
-
111
- # Check if start_line exceeds file length first
112
- if start_idx >= total_lines
113
- return {
114
- path: expanded_path,
115
- content: nil,
116
- lines_read: 0,
117
- error: "Invalid line range: start_line #{start_line} exceeds total lines (#{total_lines})"
118
- }
119
- end
120
-
121
- # Validate range
122
- if start_idx > end_idx
123
- return {
124
- path: expanded_path,
125
- content: nil,
126
- lines_read: 0,
127
- error: "Invalid line range: start_line #{start_line} > end_line #{end_line || (start_line + max_lines)}"
128
- }
129
- end
130
-
131
- lines = all_lines[start_idx..end_idx] || []
132
-
133
- # Truncate individual lines that are too long
134
- lines = lines.map do |line|
135
- if line.length > MAX_LINE_CHARS
136
- line[0...MAX_LINE_CHARS] + "... [Line truncated - #{line.length} chars]\n"
137
- else
138
- line
114
+ # Unknown / :file could be an unrecognised source file, a binary
115
+ # blob, or anything else. Fall back to:
116
+ # 1. If FileProcessor.binary_file_path? says it's binary → report unsupported.
117
+ # 2. Otherwise → read as plain text (covers .rb, .py, .js, .log, etc.).
118
+ if Utils::FileProcessor.binary_file_path?(expanded_path)
119
+ return handle_unsupported_binary(expanded_path, ref)
139
120
  end
140
- end
141
-
142
- content = lines.join
143
- truncated = end_idx < (total_lines - 1)
144
121
 
145
- # Truncate total content if it exceeds maximum size
146
- if content.length > MAX_CONTENT_CHARS
147
- content = content[0...MAX_CONTENT_CHARS] +
148
- "\n\n[Content truncated - exceeded #{MAX_CONTENT_CHARS} characters (~10,000 tokens)]" +
149
- "\nUse start_line/end_line parameters to read specific sections, or grep tool to search for keywords."
150
- truncated = true
122
+ return read_text_file(
123
+ expanded_path,
124
+ max_lines: max_lines,
125
+ start_line: start_line,
126
+ end_line: end_line
127
+ )
151
128
  end
152
-
129
+ rescue StandardError => e
153
130
  {
154
131
  path: expanded_path,
155
- content: content,
156
- lines_read: lines.size,
157
- total_lines: total_lines,
158
- truncated: truncated,
132
+ content: nil,
133
+ error: "Error reading file: #{e.message}"
134
+ }
135
+ end
136
+ end
137
+
138
+ # Read a plain-text file with line-range selection and token-budget
139
+ # truncation. The source of the text can be:
140
+ # - the original file itself (source_path == expanded_path)
141
+ # - a parser-generated preview.md for documents (source_path = ref.preview_path)
142
+ # The reported `path` is always the original file so the LLM sees a
143
+ # consistent identity.
144
+ private def read_text_file(display_path, max_lines:, start_line:, end_line:, source_path: nil, parsed_from: nil)
145
+ source_path ||= display_path
146
+
147
+ file_size = File.size(source_path)
148
+ if file_size > MAX_TEXT_FILE_SIZE
149
+ return {
150
+ path: display_path,
151
+ content: nil,
152
+ size_bytes: file_size,
153
+ error: "Text file too large: #{format_file_size(file_size)} (max: #{format_file_size(MAX_TEXT_FILE_SIZE)}). Please use grep tool to search within this file instead."
154
+ }
155
+ end
156
+
157
+ # Read text file with optional line range.
158
+ # Scrub invalid UTF-8 bytes (e.g. GBK-encoded files) so downstream
159
+ # JSON.generate / history persistence won't blow up later.
160
+ all_lines = File.readlines(source_path).map! { |line| safe_utf8(line) }
161
+ total_lines = all_lines.size
162
+
163
+ # Calculate start index (convert 1-indexed to 0-indexed)
164
+ start_idx = start_line ? [start_line - 1, 0].max : 0
165
+
166
+ # Calculate end index based on parameters
167
+ if end_line
168
+ end_idx = [end_line - 1, total_lines - 1].min
169
+ elsif start_line
170
+ calculated_end_line = start_line + max_lines - 1
171
+ end_idx = [calculated_end_line - 1, total_lines - 1].min
172
+ else
173
+ end_idx = [max_lines - 1, total_lines - 1].min
174
+ end
175
+
176
+ if total_lines == 0
177
+ return {
178
+ path: display_path,
179
+ content: "",
180
+ lines_read: 0,
181
+ total_lines: 0,
182
+ truncated: false,
159
183
  start_line: start_line,
160
184
  end_line: end_line,
185
+ parsed_from: parsed_from&.to_s,
186
+ source_path: (source_path != display_path ? source_path : nil),
161
187
  error: nil
162
188
  }
163
- rescue StandardError => e
164
- {
165
- path: expanded_path,
189
+ end
190
+
191
+ # Check if start_line exceeds file length first
192
+ if start_idx >= total_lines
193
+ return {
194
+ path: display_path,
166
195
  content: nil,
167
- error: "Error reading file: #{e.message}"
196
+ lines_read: 0,
197
+ error: "Invalid line range: start_line #{start_line} exceeds total lines (#{total_lines})"
198
+ }
199
+ end
200
+
201
+ # Validate range
202
+ if start_idx > end_idx
203
+ return {
204
+ path: display_path,
205
+ content: nil,
206
+ lines_read: 0,
207
+ error: "Invalid line range: start_line #{start_line} > end_line #{end_line || (start_line + max_lines)}"
168
208
  }
169
209
  end
210
+
211
+ lines = all_lines[start_idx..end_idx] || []
212
+
213
+ # Truncate individual lines that are too long
214
+ lines = lines.map do |line|
215
+ if line.length > MAX_LINE_CHARS
216
+ line[0...MAX_LINE_CHARS] + "... [Line truncated - #{line.length} chars]\n"
217
+ else
218
+ line
219
+ end
220
+ end
221
+
222
+ content = lines.join
223
+ truncated = end_idx < (total_lines - 1)
224
+
225
+ # Truncate total content if it exceeds maximum size
226
+ if content.length > MAX_CONTENT_CHARS
227
+ content = content[0...MAX_CONTENT_CHARS] +
228
+ "\n\n[Content truncated - exceeded #{MAX_CONTENT_CHARS} characters (~10,000 tokens)]" +
229
+ "\nUse start_line/end_line parameters to read specific sections, or grep tool to search for keywords."
230
+ truncated = true
231
+ end
232
+
233
+ {
234
+ path: display_path,
235
+ content: content,
236
+ lines_read: lines.size,
237
+ total_lines: total_lines,
238
+ truncated: truncated,
239
+ start_line: start_line,
240
+ end_line: end_line,
241
+ parsed_from: parsed_from&.to_s,
242
+ source_path: (source_path != display_path ? source_path : nil),
243
+ error: nil
244
+ }
170
245
  end
171
246
 
172
247
  def format_call(args)
@@ -199,21 +274,22 @@ module Clacky
199
274
  end
200
275
  end
201
276
 
202
- # Handle text file reading
277
+ # Handle text file reading (including parser-extracted documents)
203
278
  lines = result[:lines_read] || result['lines_read'] || 0
204
279
  truncated = result[:truncated] || result['truncated']
205
- "Read #{lines} lines#{truncated ? ' (truncated)' : ''}"
280
+ parsed_from = result[:parsed_from] || result['parsed_from']
281
+ suffix = parsed_from ? " (from #{parsed_from})" : ""
282
+ "Read #{lines} lines#{suffix}#{truncated ? ' (truncated)' : ''}"
206
283
  end
207
284
 
208
- # Format result for LLM - handles both text and binary (image/PDF) content
285
+ # Format result for LLM - handles both text and binary (image) content
209
286
  # This method is called by the agent to format tool results before sending to LLM
210
287
  def format_result_for_llm(result)
211
- # For LLM-compatible binary files with base64 data, return as content blocks
288
+ # For LLM-compatible binary files with base64 data (images only documents
289
+ # are converted to text upstream via FileProcessor parsers).
212
290
  if result[:binary] && result[:base64_data]
213
- # Create a text description
214
291
  description = "File: #{result[:path]}\nType: #{result[:format]}\nSize: #{format_file_size(result[:size_bytes])}"
215
292
 
216
- # Add size warning for large files
217
293
  if result[:size_bytes] > Utils::FileProcessor::MAX_FILE_SIZE
218
294
  description += "\nWARNING: Large file (>#{Utils::FileProcessor::MAX_FILE_SIZE / 1024}KB) - may consume significant tokens"
219
295
  end
@@ -229,8 +305,6 @@ module Clacky
229
305
  #
230
306
  # The agent detects `:image_inject` in the tool result after observe() and
231
307
  # appends a `role: "user"` system_injected message containing the image block.
232
- # This matches the standard workaround used by OpenAI's own agent SDK and
233
- # pydantic-ai for multimodal tool outputs.
234
308
  if result[:mime_type]&.start_with?("image/")
235
309
  return {
236
310
  type: "text",
@@ -243,14 +317,14 @@ module Clacky
243
317
  }
244
318
  end
245
319
 
246
- # For PDFs and other binary formats, just return metadata with base64
320
+ # No non-image binary type should reach here anymore documents now
321
+ # go through the parser + text path. Keep this as a defensive fallback.
247
322
  return {
248
323
  type: "document",
249
324
  path: result[:path],
250
325
  format: result[:format],
251
326
  size_bytes: result[:size_bytes],
252
327
  mime_type: result[:mime_type],
253
- base64_data: result[:base64_data],
254
328
  description: description
255
329
  }
256
330
  end
@@ -261,45 +335,35 @@ module Clacky
261
335
  # For directory listings, return as-is (no raw file content to preserve)
262
336
  return result if result[:is_directory]
263
337
 
264
- # For plain text files: return a plain string so the agent sends it
265
- # directly to the LLM without JSON-encoding (avoids \" / \n escaping).
338
+ # For plain text files (and parser-extracted documents): return a plain
339
+ # string so the agent sends it directly to the LLM without JSON-encoding
340
+ # (avoids \" / \n escaping).
266
341
  header = "File: #{result[:path]}"
342
+ if result[:parsed_from]
343
+ header += " [extracted from #{result[:parsed_from]}]"
344
+ end
267
345
  header += " (lines #{result[:start_line]}-#{result[:end_line]})" if result[:start_line]
268
346
  header += " [#{result[:lines_read]}/#{result[:total_lines]} lines]"
269
347
  header += " [TRUNCATED]" if result[:truncated]
270
348
  "#{header}\n\n#{result[:content]}"
271
349
  end
272
350
 
273
- private def handle_binary_file(path)
274
- # Check if it's a supported format using FileProcessor
275
- if Utils::FileProcessor.supported_binary_file?(path)
276
- # Use FileProcessor to convert to base64
277
- begin
278
- result = Utils::FileProcessor.file_to_base64(path)
279
- {
280
- path: path,
281
- binary: true,
282
- format: result[:format],
283
- mime_type: result[:mime_type],
284
- size_bytes: result[:size_bytes],
285
- base64_data: result[:base64_data],
286
- error: nil
287
- }
288
- rescue ArgumentError => e
289
- # File too large or other error
290
- file_size = File.size(path)
291
- ext = File.extname(path).downcase
292
- {
293
- path: path,
294
- binary: true,
295
- format: ext.empty? ? "unknown" : ext[1..-1],
296
- size_bytes: file_size,
297
- content: nil,
298
- error: e.message
299
- }
300
- end
301
- else
302
- # Binary file that we can't send to LLM
351
+ # Handle an image file: convert to base64 and return an LLM-ready result
352
+ # with the image_inject sidecar. Used by execute() for :image type files.
353
+ private def handle_image_file(path)
354
+ begin
355
+ result = Utils::FileProcessor.file_to_base64(path)
356
+ {
357
+ path: path,
358
+ binary: true,
359
+ format: result[:format],
360
+ mime_type: result[:mime_type],
361
+ size_bytes: result[:size_bytes],
362
+ base64_data: result[:base64_data],
363
+ error: nil
364
+ }
365
+ rescue ArgumentError => e
366
+ # File too large or unreadable
303
367
  file_size = File.size(path)
304
368
  ext = File.extname(path).downcase
305
369
  {
@@ -308,11 +372,52 @@ module Clacky
308
372
  format: ext.empty? ? "unknown" : ext[1..-1],
309
373
  size_bytes: file_size,
310
374
  content: nil,
311
- error: "Binary file detected. This format cannot be read as text. File size: #{format_file_size(file_size)}"
375
+ error: e.message
312
376
  }
313
377
  end
314
378
  end
315
379
 
380
+ # Handle an unsupported binary file (no parser available, not an image).
381
+ # Returns a clear error message so the LLM knows it needs a different approach.
382
+ private def handle_unsupported_binary(path, ref = nil)
383
+ file_size = File.size(path)
384
+ ext = File.extname(path).downcase
385
+ {
386
+ path: path,
387
+ binary: true,
388
+ format: ext.empty? ? "unknown" : ext[1..-1],
389
+ size_bytes: file_size,
390
+ content: nil,
391
+ error: "Binary file detected. This format cannot be read as text. File size: #{format_file_size(file_size)}"
392
+ }
393
+ end
394
+
395
+ # Build an error result when the parser for a supported document format
396
+ # failed. The LLM receives the parser path so it can fix and retry, matching
397
+ # the behaviour of the file-upload pipeline (agent.rb's file_prompt).
398
+ private def build_parser_failure_result(path, ref)
399
+ ext = File.extname(path).downcase
400
+ file_size = File.size(path) rescue 0
401
+ message_lines = ["Failed to extract text from #{ext.empty? ? 'file' : ext[1..-1].upcase}."]
402
+ message_lines << "Parser error: #{ref.parse_error}" if ref.parse_error
403
+ if ref.parser_path
404
+ expected_preview = "#{path}.preview.md"
405
+ message_lines << "Parser script: #{ref.parser_path}"
406
+ message_lines << "To fix: edit the parser, then run: ruby #{ref.parser_path} #{path} > #{expected_preview}"
407
+ message_lines << "After a successful parse, re-run file_reader on this file."
408
+ end
409
+ {
410
+ path: path,
411
+ binary: true,
412
+ format: ext.empty? ? "unknown" : ext[1..-1],
413
+ size_bytes: file_size,
414
+ content: nil,
415
+ parser_path: ref.parser_path,
416
+ parse_error: ref.parse_error,
417
+ error: message_lines.join("\n")
418
+ }
419
+ end
420
+
316
421
  private def detect_mime_type(path, data)
317
422
  Utils::FileProcessor.detect_mime_type(path, data)
318
423
  end