openclacky 1.0.0 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +39 -0
- data/README.md +87 -53
- data/lib/clacky/agent/cost_tracker.rb +19 -2
- data/lib/clacky/agent/llm_caller.rb +218 -0
- data/lib/clacky/agent/message_compressor_helper.rb +32 -2
- data/lib/clacky/agent.rb +54 -22
- data/lib/clacky/client.rb +44 -5
- data/lib/clacky/default_parsers/pdf_parser.rb +58 -17
- data/lib/clacky/default_parsers/pdf_parser_ocr.py +103 -0
- data/lib/clacky/default_parsers/pdf_parser_plumber.py +62 -0
- data/lib/clacky/default_skills/deploy/SKILL.md +201 -77
- data/lib/clacky/default_skills/new/SKILL.md +3 -114
- data/lib/clacky/default_skills/onboard/SKILL.md +349 -133
- data/lib/clacky/default_skills/onboard/scripts/import_external_skills.rb +371 -0
- data/lib/clacky/default_skills/onboard/scripts/install_builtin_skills.rb +175 -0
- data/lib/clacky/default_skills/skill-add/scripts/install_from_zip.rb +59 -26
- data/lib/clacky/message_format/anthropic.rb +72 -8
- data/lib/clacky/message_format/bedrock.rb +6 -3
- data/lib/clacky/providers.rb +146 -3
- data/lib/clacky/server/channel/adapters/feishu/adapter.rb +14 -0
- data/lib/clacky/server/channel/adapters/feishu/bot.rb +10 -0
- data/lib/clacky/server/channel/adapters/feishu/message_parser.rb +1 -0
- data/lib/clacky/server/channel/channel_manager.rb +12 -4
- data/lib/clacky/server/channel/channel_ui_controller.rb +8 -2
- data/lib/clacky/server/http_server.rb +746 -13
- data/lib/clacky/server/session_registry.rb +55 -24
- data/lib/clacky/skill.rb +10 -9
- data/lib/clacky/skill_loader.rb +23 -11
- data/lib/clacky/tools/file_reader.rb +232 -127
- data/lib/clacky/tools/security.rb +42 -64
- data/lib/clacky/tools/terminal/persistent_session.rb +15 -4
- data/lib/clacky/tools/terminal/safe_rm.sh +106 -0
- data/lib/clacky/tools/terminal/session_manager.rb +8 -3
- data/lib/clacky/tools/terminal.rb +263 -16
- data/lib/clacky/ui2/layout_manager.rb +8 -1
- data/lib/clacky/ui2/output_buffer.rb +83 -23
- data/lib/clacky/ui2/ui_controller.rb +74 -7
- data/lib/clacky/utils/file_processor.rb +14 -40
- data/lib/clacky/utils/model_pricing.rb +215 -0
- data/lib/clacky/utils/parser_manager.rb +70 -6
- data/lib/clacky/utils/string_matcher.rb +23 -1
- data/lib/clacky/version.rb +1 -1
- data/lib/clacky/web/app.css +673 -9
- data/lib/clacky/web/app.js +40 -1608
- data/lib/clacky/web/i18n.js +209 -0
- data/lib/clacky/web/index.html +166 -2
- data/lib/clacky/web/onboard.js +77 -1
- data/lib/clacky/web/profile.js +442 -0
- data/lib/clacky/web/sessions.js +1034 -2
- data/lib/clacky/web/settings.js +127 -6
- data/lib/clacky/web/sidebar.js +39 -0
- data/lib/clacky/web/skills.js +460 -0
- data/lib/clacky/web/trash.js +343 -0
- data/lib/clacky/web/ws-dispatcher.js +255 -0
- data/lib/clacky.rb +5 -3
- metadata +16 -17
- data/lib/clacky/clacky_auth_client.rb +0 -152
- data/lib/clacky/clacky_cloud_config.rb +0 -123
- data/lib/clacky/cloud_project_client.rb +0 -169
- data/lib/clacky/default_skills/deploy/scripts/rails_deploy.rb +0 -1377
- data/lib/clacky/default_skills/deploy/tools/check_health.rb +0 -116
- data/lib/clacky/default_skills/deploy/tools/create_database_service.rb +0 -341
- data/lib/clacky/default_skills/deploy/tools/execute_deployment.rb +0 -99
- data/lib/clacky/default_skills/deploy/tools/fetch_runtime_logs.rb +0 -77
- data/lib/clacky/default_skills/deploy/tools/list_services.rb +0 -67
- data/lib/clacky/default_skills/deploy/tools/report_deploy_status.rb +0 -67
- data/lib/clacky/default_skills/deploy/tools/set_deploy_variables.rb +0 -189
- data/lib/clacky/default_skills/new/scripts/cloud_project_init.sh +0 -74
- data/lib/clacky/deploy_api_client.rb +0 -484
|
@@ -218,31 +218,62 @@ module Clacky
|
|
|
218
218
|
|
|
219
219
|
ordered = pinned_section + non_pinned
|
|
220
220
|
|
|
221
|
-
ordered.map
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
221
|
+
ordered.map { |s| build_enriched_row(s, live[s[:session_id]]) }
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
# Return the same enriched hash that a `list` row would produce, for a
|
|
225
|
+
# single session — merging on-disk fields with in-memory live fields.
|
|
226
|
+
# Returns nil if the session is unknown on disk.
|
|
227
|
+
#
|
|
228
|
+
# This is the targeted, O(1) counterpart to `list` used by the WS layer
|
|
229
|
+
# when it only needs one row (e.g. pushing a fresh snapshot to a client
|
|
230
|
+
# that just (re)subscribed, or broadcasting a status-change update).
|
|
231
|
+
def snapshot(session_id)
|
|
232
|
+
return nil unless @session_manager
|
|
233
|
+
disk = @session_manager.load(session_id)
|
|
234
|
+
return nil unless disk
|
|
235
|
+
|
|
236
|
+
live = @mutex.synchronize do
|
|
237
|
+
s = @sessions[session_id]
|
|
238
|
+
next nil unless s
|
|
239
|
+
model_info = s[:agent]&.current_model_info
|
|
240
|
+
live_name = s[:agent]&.name
|
|
241
|
+
live_name = nil if live_name&.empty?
|
|
242
|
+
{ status: s[:status], error: s[:error], model: model_info&.dig(:model),
|
|
243
|
+
name: live_name, total_tasks: s[:agent]&.total_tasks,
|
|
244
|
+
total_cost: s[:agent]&.total_cost, cost_source: s[:agent]&.cost_source,
|
|
245
|
+
latest_latency: s[:agent]&.latest_latency }
|
|
245
246
|
end
|
|
247
|
+
|
|
248
|
+
build_enriched_row(disk, live)
|
|
249
|
+
end
|
|
250
|
+
|
|
251
|
+
# Merge a single disk-side session hash with the corresponding live
|
|
252
|
+
# in-memory agent fields (may be nil) into the row shape the frontend
|
|
253
|
+
# consumes.
|
|
254
|
+
private def build_enriched_row(s, ls)
|
|
255
|
+
id = s[:session_id]
|
|
256
|
+
{
|
|
257
|
+
id: id,
|
|
258
|
+
name: ls&.dig(:name) || s[:name] || "",
|
|
259
|
+
status: ls ? ls[:status].to_s : "idle",
|
|
260
|
+
error: ls ? ls[:error] : nil,
|
|
261
|
+
model: ls&.dig(:model),
|
|
262
|
+
source: s_source(s),
|
|
263
|
+
agent_profile: (s[:agent_profile] || "general").to_s,
|
|
264
|
+
working_dir: s[:working_dir],
|
|
265
|
+
created_at: s[:created_at],
|
|
266
|
+
updated_at: s[:updated_at],
|
|
267
|
+
total_tasks: ls&.dig(:total_tasks) || s.dig(:stats, :total_tasks) || 0,
|
|
268
|
+
total_cost: ls&.dig(:total_cost) || s.dig(:stats, :total_cost_usd) || 0.0,
|
|
269
|
+
cost_source: (ls&.dig(:cost_source) || s.dig(:stats, :cost_source) || "estimated").to_s,
|
|
270
|
+
# latest_latency is in-memory only (live sessions) — not persisted
|
|
271
|
+
# at the session-level on disk. The on-disk source of truth is
|
|
272
|
+
# per-assistant-message `latency` fields in messages[]. Reloaded
|
|
273
|
+
# sessions start with nil and get populated on the next LLM call.
|
|
274
|
+
latest_latency: ls&.dig(:latest_latency),
|
|
275
|
+
pinned: s[:pinned] || false,
|
|
276
|
+
}
|
|
246
277
|
end
|
|
247
278
|
|
|
248
279
|
|
data/lib/clacky/skill.rb
CHANGED
|
@@ -514,19 +514,20 @@ module Clacky
|
|
|
514
514
|
@warnings << "Invalid name '#{@name}' in metadata; using directory name '#{dir_slug}' instead."
|
|
515
515
|
@name = dir_slug
|
|
516
516
|
else
|
|
517
|
-
#
|
|
518
|
-
|
|
519
|
-
@
|
|
520
|
-
|
|
521
|
-
@name =
|
|
517
|
+
# Both name and directory slug are invalid (e.g. contains dots from version suffix).
|
|
518
|
+
# Record a warning but keep the skill usable — do not mark as invalid.
|
|
519
|
+
@warnings << "Invalid skill name '#{@name}' and directory name '#{dir_slug}' is also not a valid slug. " \
|
|
520
|
+
"Expected lowercase letters, numbers, and hyphens (e.g. 'my-skill')."
|
|
521
|
+
@name = dir_slug
|
|
522
522
|
end
|
|
523
523
|
end
|
|
524
524
|
else
|
|
525
|
-
# No name in frontmatter — check the directory slug itself
|
|
525
|
+
# No name in frontmatter — check the directory slug itself.
|
|
526
|
+
# Non-conforming names (e.g. version-suffixed dirs like "test-runner-1.0.0")
|
|
527
|
+
# are allowed with a warning rather than being rejected outright.
|
|
526
528
|
unless valid_slug.call(dir_slug)
|
|
527
|
-
@
|
|
528
|
-
|
|
529
|
-
"Expected lowercase letters, numbers, and hyphens (e.g. 'my-skill')."
|
|
529
|
+
@warnings << "Directory name '#{dir_slug}' is not a valid skill slug. " \
|
|
530
|
+
"Expected lowercase letters, numbers, and hyphens (e.g. 'my-skill')."
|
|
530
531
|
end
|
|
531
532
|
end
|
|
532
533
|
|
data/lib/clacky/skill_loader.rb
CHANGED
|
@@ -287,20 +287,32 @@ module Clacky
|
|
|
287
287
|
def load_skills_from_directory(dir, source_type)
|
|
288
288
|
return [] unless dir.exist?
|
|
289
289
|
|
|
290
|
+
source_path = case source_type
|
|
291
|
+
when :global_clacky
|
|
292
|
+
Pathname.new(ENV.fetch("HOME", "~")).join(".clacky")
|
|
293
|
+
when :project_clacky
|
|
294
|
+
Pathname.new(@working_dir)
|
|
295
|
+
else
|
|
296
|
+
dir
|
|
297
|
+
end
|
|
298
|
+
|
|
290
299
|
skills = []
|
|
291
|
-
dir.children.select(&:directory?).each do |
|
|
292
|
-
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
Pathname.new(@working_dir)
|
|
300
|
+
dir.children.select(&:directory?).each do |entry|
|
|
301
|
+
if entry.join("SKILL.md").exist?
|
|
302
|
+
# Direct skill directory
|
|
303
|
+
skill = load_single_skill(entry, source_path, entry.basename.to_s, source_type)
|
|
304
|
+
skills << skill if skill
|
|
297
305
|
else
|
|
298
|
-
|
|
306
|
+
# Treat as a category directory — scan one level deeper for skills.
|
|
307
|
+
# This allows grouping skills under ~/.clacky/skills/<category>/<skill>/SKILL.md
|
|
308
|
+
# (e.g. openclaw-imports/my-skill/SKILL.md) without changing the loader contract.
|
|
309
|
+
entry.children.select(&:directory?).each do |skill_dir|
|
|
310
|
+
next unless skill_dir.join("SKILL.md").exist?
|
|
311
|
+
|
|
312
|
+
skill = load_single_skill(skill_dir, source_path, skill_dir.basename.to_s, source_type)
|
|
313
|
+
skills << skill if skill
|
|
314
|
+
end
|
|
299
315
|
end
|
|
300
|
-
|
|
301
|
-
skill_name = skill_dir.basename.to_s
|
|
302
|
-
skill = load_single_skill(skill_dir, source_path, skill_name, source_type)
|
|
303
|
-
skills << skill if skill
|
|
304
316
|
end
|
|
305
317
|
skills
|
|
306
318
|
end
|
|
@@ -7,7 +7,7 @@ module Clacky
|
|
|
7
7
|
module Tools
|
|
8
8
|
class FileReader < Base
|
|
9
9
|
self.tool_name = "file_reader"
|
|
10
|
-
self.tool_description = "Read contents of a file from the filesystem. Supports text files, images (PNG/JPG/GIF/WEBP), and documents (PDF/DOCX/XLSX/PPTX)."
|
|
10
|
+
self.tool_description = "Read contents of a file from the filesystem. Supports text files, images (PNG/JPG/GIF/WEBP), and documents (PDF/DOCX/XLSX/PPTX — auto-converted to text via parsers, with OCR fallback for scanned PDFs)."
|
|
11
11
|
self.tool_category = "file_system"
|
|
12
12
|
self.tool_parameters = {
|
|
13
13
|
type: "object",
|
|
@@ -39,7 +39,7 @@ module Clacky
|
|
|
39
39
|
MAX_TEXT_FILE_SIZE = 1 * 1024 * 1024
|
|
40
40
|
|
|
41
41
|
# Maximum content size to return (~10,000 tokens = ~40,000 characters)
|
|
42
|
-
MAX_CONTENT_CHARS =
|
|
42
|
+
MAX_CONTENT_CHARS = 60_000
|
|
43
43
|
|
|
44
44
|
# Maximum characters per line (prevent single huge lines from bloating tokens)
|
|
45
45
|
MAX_LINE_CHARS = 1000
|
|
@@ -70,103 +70,178 @@ module Clacky
|
|
|
70
70
|
end
|
|
71
71
|
|
|
72
72
|
begin
|
|
73
|
-
#
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
73
|
+
# Delegate to FileProcessor for file type dispatch. FileProcessor is
|
|
74
|
+
# the single source of truth for how a file becomes a readable form
|
|
75
|
+
# (parser-extracted text, image base64, archive listing, plain text).
|
|
76
|
+
# FileReader here only shapes the result for the LLM.
|
|
77
|
+
ref = Utils::FileProcessor.process_path(expanded_path)
|
|
78
|
+
|
|
79
|
+
case ref.type
|
|
80
|
+
when :image
|
|
81
|
+
# Images go to LLM as base64 via the image_inject sidecar channel.
|
|
82
|
+
return handle_image_file(expanded_path)
|
|
83
|
+
|
|
84
|
+
when :pdf, :document, :spreadsheet, :presentation
|
|
85
|
+
# Parser-backed document formats. FileProcessor has already
|
|
86
|
+
# produced a preview markdown file (or set parse_error on failure).
|
|
87
|
+
if ref.preview_path && File.exist?(ref.preview_path)
|
|
88
|
+
return read_text_file(
|
|
89
|
+
expanded_path,
|
|
90
|
+
max_lines: max_lines,
|
|
91
|
+
start_line: start_line,
|
|
92
|
+
end_line: end_line,
|
|
93
|
+
source_path: ref.preview_path,
|
|
94
|
+
parsed_from: ref.type
|
|
95
|
+
)
|
|
96
|
+
else
|
|
97
|
+
return build_parser_failure_result(expanded_path, ref)
|
|
98
|
+
end
|
|
77
99
|
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
100
|
+
when :text, :csv, :zip
|
|
101
|
+
# FileProcessor already produced a preview (raw text copy for
|
|
102
|
+
# text/csv, archive listing for zip/tar). Read the preview with
|
|
103
|
+
# normal line-range + truncation rules.
|
|
104
|
+
source = (ref.preview_path && File.exist?(ref.preview_path)) ? ref.preview_path : expanded_path
|
|
105
|
+
return read_text_file(
|
|
106
|
+
expanded_path,
|
|
107
|
+
max_lines: max_lines,
|
|
108
|
+
start_line: start_line,
|
|
109
|
+
end_line: end_line,
|
|
110
|
+
source_path: source
|
|
111
|
+
)
|
|
88
112
|
|
|
89
|
-
# Read text file with optional line range.
|
|
90
|
-
# Scrub invalid UTF-8 bytes (e.g. GBK-encoded files) so downstream
|
|
91
|
-
# JSON.generate / history persistence won't blow up later.
|
|
92
|
-
all_lines = File.readlines(expanded_path).map! { |line| safe_utf8(line) }
|
|
93
|
-
total_lines = all_lines.size
|
|
94
|
-
|
|
95
|
-
# Calculate start index (convert 1-indexed to 0-indexed)
|
|
96
|
-
start_idx = start_line ? [start_line - 1, 0].max : 0
|
|
97
|
-
|
|
98
|
-
# Calculate end index based on parameters
|
|
99
|
-
if end_line
|
|
100
|
-
# User specified end_line directly
|
|
101
|
-
end_idx = [end_line - 1, total_lines - 1].min
|
|
102
|
-
elsif start_line
|
|
103
|
-
# start_line + max_lines - 1 (relative to start_line, inclusive)
|
|
104
|
-
calculated_end_line = start_line + max_lines - 1
|
|
105
|
-
end_idx = [calculated_end_line - 1, total_lines - 1].min
|
|
106
113
|
else
|
|
107
|
-
#
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
return {
|
|
114
|
-
path: expanded_path,
|
|
115
|
-
content: nil,
|
|
116
|
-
lines_read: 0,
|
|
117
|
-
error: "Invalid line range: start_line #{start_line} exceeds total lines (#{total_lines})"
|
|
118
|
-
}
|
|
119
|
-
end
|
|
120
|
-
|
|
121
|
-
# Validate range
|
|
122
|
-
if start_idx > end_idx
|
|
123
|
-
return {
|
|
124
|
-
path: expanded_path,
|
|
125
|
-
content: nil,
|
|
126
|
-
lines_read: 0,
|
|
127
|
-
error: "Invalid line range: start_line #{start_line} > end_line #{end_line || (start_line + max_lines)}"
|
|
128
|
-
}
|
|
129
|
-
end
|
|
130
|
-
|
|
131
|
-
lines = all_lines[start_idx..end_idx] || []
|
|
132
|
-
|
|
133
|
-
# Truncate individual lines that are too long
|
|
134
|
-
lines = lines.map do |line|
|
|
135
|
-
if line.length > MAX_LINE_CHARS
|
|
136
|
-
line[0...MAX_LINE_CHARS] + "... [Line truncated - #{line.length} chars]\n"
|
|
137
|
-
else
|
|
138
|
-
line
|
|
114
|
+
# Unknown / :file — could be an unrecognised source file, a binary
|
|
115
|
+
# blob, or anything else. Fall back to:
|
|
116
|
+
# 1. If FileProcessor.binary_file_path? says it's binary → report unsupported.
|
|
117
|
+
# 2. Otherwise → read as plain text (covers .rb, .py, .js, .log, etc.).
|
|
118
|
+
if Utils::FileProcessor.binary_file_path?(expanded_path)
|
|
119
|
+
return handle_unsupported_binary(expanded_path, ref)
|
|
139
120
|
end
|
|
140
|
-
end
|
|
141
|
-
|
|
142
|
-
content = lines.join
|
|
143
|
-
truncated = end_idx < (total_lines - 1)
|
|
144
121
|
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
122
|
+
return read_text_file(
|
|
123
|
+
expanded_path,
|
|
124
|
+
max_lines: max_lines,
|
|
125
|
+
start_line: start_line,
|
|
126
|
+
end_line: end_line
|
|
127
|
+
)
|
|
151
128
|
end
|
|
152
|
-
|
|
129
|
+
rescue StandardError => e
|
|
153
130
|
{
|
|
154
131
|
path: expanded_path,
|
|
155
|
-
content:
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
132
|
+
content: nil,
|
|
133
|
+
error: "Error reading file: #{e.message}"
|
|
134
|
+
}
|
|
135
|
+
end
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
# Read a plain-text file with line-range selection and token-budget
|
|
139
|
+
# truncation. The source of the text can be:
|
|
140
|
+
# - the original file itself (source_path == expanded_path)
|
|
141
|
+
# - a parser-generated preview.md for documents (source_path = ref.preview_path)
|
|
142
|
+
# The reported `path` is always the original file so the LLM sees a
|
|
143
|
+
# consistent identity.
|
|
144
|
+
private def read_text_file(display_path, max_lines:, start_line:, end_line:, source_path: nil, parsed_from: nil)
|
|
145
|
+
source_path ||= display_path
|
|
146
|
+
|
|
147
|
+
file_size = File.size(source_path)
|
|
148
|
+
if file_size > MAX_TEXT_FILE_SIZE
|
|
149
|
+
return {
|
|
150
|
+
path: display_path,
|
|
151
|
+
content: nil,
|
|
152
|
+
size_bytes: file_size,
|
|
153
|
+
error: "Text file too large: #{format_file_size(file_size)} (max: #{format_file_size(MAX_TEXT_FILE_SIZE)}). Please use grep tool to search within this file instead."
|
|
154
|
+
}
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# Read text file with optional line range.
|
|
158
|
+
# Scrub invalid UTF-8 bytes (e.g. GBK-encoded files) so downstream
|
|
159
|
+
# JSON.generate / history persistence won't blow up later.
|
|
160
|
+
all_lines = File.readlines(source_path).map! { |line| safe_utf8(line) }
|
|
161
|
+
total_lines = all_lines.size
|
|
162
|
+
|
|
163
|
+
# Calculate start index (convert 1-indexed to 0-indexed)
|
|
164
|
+
start_idx = start_line ? [start_line - 1, 0].max : 0
|
|
165
|
+
|
|
166
|
+
# Calculate end index based on parameters
|
|
167
|
+
if end_line
|
|
168
|
+
end_idx = [end_line - 1, total_lines - 1].min
|
|
169
|
+
elsif start_line
|
|
170
|
+
calculated_end_line = start_line + max_lines - 1
|
|
171
|
+
end_idx = [calculated_end_line - 1, total_lines - 1].min
|
|
172
|
+
else
|
|
173
|
+
end_idx = [max_lines - 1, total_lines - 1].min
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
if total_lines == 0
|
|
177
|
+
return {
|
|
178
|
+
path: display_path,
|
|
179
|
+
content: "",
|
|
180
|
+
lines_read: 0,
|
|
181
|
+
total_lines: 0,
|
|
182
|
+
truncated: false,
|
|
159
183
|
start_line: start_line,
|
|
160
184
|
end_line: end_line,
|
|
185
|
+
parsed_from: parsed_from&.to_s,
|
|
186
|
+
source_path: (source_path != display_path ? source_path : nil),
|
|
161
187
|
error: nil
|
|
162
188
|
}
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
# Check if start_line exceeds file length first
|
|
192
|
+
if start_idx >= total_lines
|
|
193
|
+
return {
|
|
194
|
+
path: display_path,
|
|
166
195
|
content: nil,
|
|
167
|
-
|
|
196
|
+
lines_read: 0,
|
|
197
|
+
error: "Invalid line range: start_line #{start_line} exceeds total lines (#{total_lines})"
|
|
198
|
+
}
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
# Validate range
|
|
202
|
+
if start_idx > end_idx
|
|
203
|
+
return {
|
|
204
|
+
path: display_path,
|
|
205
|
+
content: nil,
|
|
206
|
+
lines_read: 0,
|
|
207
|
+
error: "Invalid line range: start_line #{start_line} > end_line #{end_line || (start_line + max_lines)}"
|
|
168
208
|
}
|
|
169
209
|
end
|
|
210
|
+
|
|
211
|
+
lines = all_lines[start_idx..end_idx] || []
|
|
212
|
+
|
|
213
|
+
# Truncate individual lines that are too long
|
|
214
|
+
lines = lines.map do |line|
|
|
215
|
+
if line.length > MAX_LINE_CHARS
|
|
216
|
+
line[0...MAX_LINE_CHARS] + "... [Line truncated - #{line.length} chars]\n"
|
|
217
|
+
else
|
|
218
|
+
line
|
|
219
|
+
end
|
|
220
|
+
end
|
|
221
|
+
|
|
222
|
+
content = lines.join
|
|
223
|
+
truncated = end_idx < (total_lines - 1)
|
|
224
|
+
|
|
225
|
+
# Truncate total content if it exceeds maximum size
|
|
226
|
+
if content.length > MAX_CONTENT_CHARS
|
|
227
|
+
content = content[0...MAX_CONTENT_CHARS] +
|
|
228
|
+
"\n\n[Content truncated - exceeded #{MAX_CONTENT_CHARS} characters (~10,000 tokens)]" +
|
|
229
|
+
"\nUse start_line/end_line parameters to read specific sections, or grep tool to search for keywords."
|
|
230
|
+
truncated = true
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
{
|
|
234
|
+
path: display_path,
|
|
235
|
+
content: content,
|
|
236
|
+
lines_read: lines.size,
|
|
237
|
+
total_lines: total_lines,
|
|
238
|
+
truncated: truncated,
|
|
239
|
+
start_line: start_line,
|
|
240
|
+
end_line: end_line,
|
|
241
|
+
parsed_from: parsed_from&.to_s,
|
|
242
|
+
source_path: (source_path != display_path ? source_path : nil),
|
|
243
|
+
error: nil
|
|
244
|
+
}
|
|
170
245
|
end
|
|
171
246
|
|
|
172
247
|
def format_call(args)
|
|
@@ -199,21 +274,22 @@ module Clacky
|
|
|
199
274
|
end
|
|
200
275
|
end
|
|
201
276
|
|
|
202
|
-
# Handle text file reading
|
|
277
|
+
# Handle text file reading (including parser-extracted documents)
|
|
203
278
|
lines = result[:lines_read] || result['lines_read'] || 0
|
|
204
279
|
truncated = result[:truncated] || result['truncated']
|
|
205
|
-
|
|
280
|
+
parsed_from = result[:parsed_from] || result['parsed_from']
|
|
281
|
+
suffix = parsed_from ? " (from #{parsed_from})" : ""
|
|
282
|
+
"Read #{lines} lines#{suffix}#{truncated ? ' (truncated)' : ''}"
|
|
206
283
|
end
|
|
207
284
|
|
|
208
|
-
# Format result for LLM - handles both text and binary (image
|
|
285
|
+
# Format result for LLM - handles both text and binary (image) content
|
|
209
286
|
# This method is called by the agent to format tool results before sending to LLM
|
|
210
287
|
def format_result_for_llm(result)
|
|
211
|
-
# For LLM-compatible binary files with base64 data
|
|
288
|
+
# For LLM-compatible binary files with base64 data (images only — documents
|
|
289
|
+
# are converted to text upstream via FileProcessor parsers).
|
|
212
290
|
if result[:binary] && result[:base64_data]
|
|
213
|
-
# Create a text description
|
|
214
291
|
description = "File: #{result[:path]}\nType: #{result[:format]}\nSize: #{format_file_size(result[:size_bytes])}"
|
|
215
292
|
|
|
216
|
-
# Add size warning for large files
|
|
217
293
|
if result[:size_bytes] > Utils::FileProcessor::MAX_FILE_SIZE
|
|
218
294
|
description += "\nWARNING: Large file (>#{Utils::FileProcessor::MAX_FILE_SIZE / 1024}KB) - may consume significant tokens"
|
|
219
295
|
end
|
|
@@ -229,8 +305,6 @@ module Clacky
|
|
|
229
305
|
#
|
|
230
306
|
# The agent detects `:image_inject` in the tool result after observe() and
|
|
231
307
|
# appends a `role: "user"` system_injected message containing the image block.
|
|
232
|
-
# This matches the standard workaround used by OpenAI's own agent SDK and
|
|
233
|
-
# pydantic-ai for multimodal tool outputs.
|
|
234
308
|
if result[:mime_type]&.start_with?("image/")
|
|
235
309
|
return {
|
|
236
310
|
type: "text",
|
|
@@ -243,14 +317,14 @@ module Clacky
|
|
|
243
317
|
}
|
|
244
318
|
end
|
|
245
319
|
|
|
246
|
-
#
|
|
320
|
+
# No non-image binary type should reach here anymore — documents now
|
|
321
|
+
# go through the parser + text path. Keep this as a defensive fallback.
|
|
247
322
|
return {
|
|
248
323
|
type: "document",
|
|
249
324
|
path: result[:path],
|
|
250
325
|
format: result[:format],
|
|
251
326
|
size_bytes: result[:size_bytes],
|
|
252
327
|
mime_type: result[:mime_type],
|
|
253
|
-
base64_data: result[:base64_data],
|
|
254
328
|
description: description
|
|
255
329
|
}
|
|
256
330
|
end
|
|
@@ -261,45 +335,35 @@ module Clacky
|
|
|
261
335
|
# For directory listings, return as-is (no raw file content to preserve)
|
|
262
336
|
return result if result[:is_directory]
|
|
263
337
|
|
|
264
|
-
# For plain text files: return a plain
|
|
265
|
-
# directly to the LLM without JSON-encoding
|
|
338
|
+
# For plain text files (and parser-extracted documents): return a plain
|
|
339
|
+
# string so the agent sends it directly to the LLM without JSON-encoding
|
|
340
|
+
# (avoids \" / \n escaping).
|
|
266
341
|
header = "File: #{result[:path]}"
|
|
342
|
+
if result[:parsed_from]
|
|
343
|
+
header += " [extracted from #{result[:parsed_from]}]"
|
|
344
|
+
end
|
|
267
345
|
header += " (lines #{result[:start_line]}-#{result[:end_line]})" if result[:start_line]
|
|
268
346
|
header += " [#{result[:lines_read]}/#{result[:total_lines]} lines]"
|
|
269
347
|
header += " [TRUNCATED]" if result[:truncated]
|
|
270
348
|
"#{header}\n\n#{result[:content]}"
|
|
271
349
|
end
|
|
272
350
|
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
# File too large or other error
|
|
290
|
-
file_size = File.size(path)
|
|
291
|
-
ext = File.extname(path).downcase
|
|
292
|
-
{
|
|
293
|
-
path: path,
|
|
294
|
-
binary: true,
|
|
295
|
-
format: ext.empty? ? "unknown" : ext[1..-1],
|
|
296
|
-
size_bytes: file_size,
|
|
297
|
-
content: nil,
|
|
298
|
-
error: e.message
|
|
299
|
-
}
|
|
300
|
-
end
|
|
301
|
-
else
|
|
302
|
-
# Binary file that we can't send to LLM
|
|
351
|
+
# Handle an image file: convert to base64 and return an LLM-ready result
|
|
352
|
+
# with the image_inject sidecar. Used by execute() for :image type files.
|
|
353
|
+
private def handle_image_file(path)
|
|
354
|
+
begin
|
|
355
|
+
result = Utils::FileProcessor.file_to_base64(path)
|
|
356
|
+
{
|
|
357
|
+
path: path,
|
|
358
|
+
binary: true,
|
|
359
|
+
format: result[:format],
|
|
360
|
+
mime_type: result[:mime_type],
|
|
361
|
+
size_bytes: result[:size_bytes],
|
|
362
|
+
base64_data: result[:base64_data],
|
|
363
|
+
error: nil
|
|
364
|
+
}
|
|
365
|
+
rescue ArgumentError => e
|
|
366
|
+
# File too large or unreadable
|
|
303
367
|
file_size = File.size(path)
|
|
304
368
|
ext = File.extname(path).downcase
|
|
305
369
|
{
|
|
@@ -308,11 +372,52 @@ module Clacky
|
|
|
308
372
|
format: ext.empty? ? "unknown" : ext[1..-1],
|
|
309
373
|
size_bytes: file_size,
|
|
310
374
|
content: nil,
|
|
311
|
-
error:
|
|
375
|
+
error: e.message
|
|
312
376
|
}
|
|
313
377
|
end
|
|
314
378
|
end
|
|
315
379
|
|
|
380
|
+
# Handle an unsupported binary file (no parser available, not an image).
|
|
381
|
+
# Returns a clear error message so the LLM knows it needs a different approach.
|
|
382
|
+
private def handle_unsupported_binary(path, ref = nil)
|
|
383
|
+
file_size = File.size(path)
|
|
384
|
+
ext = File.extname(path).downcase
|
|
385
|
+
{
|
|
386
|
+
path: path,
|
|
387
|
+
binary: true,
|
|
388
|
+
format: ext.empty? ? "unknown" : ext[1..-1],
|
|
389
|
+
size_bytes: file_size,
|
|
390
|
+
content: nil,
|
|
391
|
+
error: "Binary file detected. This format cannot be read as text. File size: #{format_file_size(file_size)}"
|
|
392
|
+
}
|
|
393
|
+
end
|
|
394
|
+
|
|
395
|
+
# Build an error result when the parser for a supported document format
|
|
396
|
+
# failed. The LLM receives the parser path so it can fix and retry, matching
|
|
397
|
+
# the behaviour of the file-upload pipeline (agent.rb's file_prompt).
|
|
398
|
+
private def build_parser_failure_result(path, ref)
|
|
399
|
+
ext = File.extname(path).downcase
|
|
400
|
+
file_size = File.size(path) rescue 0
|
|
401
|
+
message_lines = ["Failed to extract text from #{ext.empty? ? 'file' : ext[1..-1].upcase}."]
|
|
402
|
+
message_lines << "Parser error: #{ref.parse_error}" if ref.parse_error
|
|
403
|
+
if ref.parser_path
|
|
404
|
+
expected_preview = "#{path}.preview.md"
|
|
405
|
+
message_lines << "Parser script: #{ref.parser_path}"
|
|
406
|
+
message_lines << "To fix: edit the parser, then run: ruby #{ref.parser_path} #{path} > #{expected_preview}"
|
|
407
|
+
message_lines << "After a successful parse, re-run file_reader on this file."
|
|
408
|
+
end
|
|
409
|
+
{
|
|
410
|
+
path: path,
|
|
411
|
+
binary: true,
|
|
412
|
+
format: ext.empty? ? "unknown" : ext[1..-1],
|
|
413
|
+
size_bytes: file_size,
|
|
414
|
+
content: nil,
|
|
415
|
+
parser_path: ref.parser_path,
|
|
416
|
+
parse_error: ref.parse_error,
|
|
417
|
+
error: message_lines.join("\n")
|
|
418
|
+
}
|
|
419
|
+
end
|
|
420
|
+
|
|
316
421
|
private def detect_mime_type(path, data)
|
|
317
422
|
Utils::FileProcessor.detect_mime_type(path, data)
|
|
318
423
|
end
|