openclacky 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,11 +6,10 @@ module Clacky
6
6
  module UI2
7
7
  # LayoutManager manages screen layout with split areas (output area on top, input area on bottom)
8
8
  class LayoutManager
9
- attr_reader :screen, :output_area, :input_area, :todo_area
9
+ attr_reader :screen, :input_area, :todo_area
10
10
 
11
- def initialize(output_area:, input_area:, todo_area: nil)
11
+ def initialize(input_area:, todo_area: nil)
12
12
  @screen = ScreenBuffer.new
13
- @output_area = output_area
14
13
  @input_area = input_area
15
14
  @todo_area = todo_area
16
15
  @render_mutex = Mutex.new
@@ -35,7 +34,6 @@ module Clacky
35
34
  @input_row = @todo_row + todo_height
36
35
 
37
36
  # Update component dimensions
38
- @output_area.height = @output_height
39
37
  @input_area.row = @input_row
40
38
  end
41
39
 
@@ -93,10 +91,13 @@ module Clacky
93
91
  def position_inline_input_cursor(inline_input)
94
92
  return unless inline_input
95
93
 
96
- # InlineInput renders its own visual cursor via render_line_with_cursor
97
- # (white background on cursor character), so we don't need terminal cursor.
98
- # Just hide the terminal cursor to avoid showing two cursors.
99
- screen.hide_cursor
94
+ # Calculate the actual terminal cursor position considering multi-byte characters
95
+ # InlineInput is on the last output line (@output_row - 1)
96
+ cursor_row = @output_row - 1
97
+ cursor_col = inline_input.cursor_col # This already considers display width
98
+
99
+ # Move terminal cursor to the correct position
100
+ screen.move_cursor(cursor_row, cursor_col)
100
101
  screen.flush
101
102
  end
102
103
 
@@ -159,14 +160,16 @@ module Clacky
159
160
  # Clear output area (for /clear command)
160
161
  def clear_output
161
162
  @render_mutex.synchronize do
162
- # Clear all lines in output area (from 0 to fixed_area_start - 1)
163
- max_output_row = fixed_area_start_row
164
- (0...max_output_row).each do |row|
163
+ # Clear all lines in output area (from 0 to where fixed area starts)
164
+ max_row = fixed_area_start_row
165
+ (0...max_row).each do |row|
165
166
  screen.move_cursor(row, 0)
166
167
  screen.clear_line
167
168
  end
168
- # Reset output row position to start
169
+
170
+ # Reset output position to beginning
169
171
  @output_row = 0
172
+
170
173
  # Re-render fixed areas to ensure they stay in place
171
174
  render_fixed_areas
172
175
  screen.flush
@@ -174,46 +177,24 @@ module Clacky
174
177
  end
175
178
 
176
179
  # Append content to output area
177
- # Track current row, scroll when reaching fixed area
178
- # @param content [String] Content to append
180
+ # This is the main output method - handles scrolling and fixed area preservation
181
+ # @param content [String] Content to append (can be multi-line)
179
182
  def append_output(content)
180
183
  return if content.nil?
181
184
 
182
185
  @render_mutex.synchronize do
183
- max_output_row = fixed_area_start_row - 1
184
-
185
- # Special handling for empty string - just add a blank line
186
- if content.empty?
187
- print "\n"
188
- @output_row += 1
189
- render_fixed_areas
190
- screen.flush
191
- return
192
- end
193
-
194
- content.split("\n").each do |line|
186
+ lines = content.split("\n", -1) # -1 to keep trailing empty strings
187
+
188
+ lines.each_with_index do |line, index|
195
189
  # Wrap long lines to prevent display issues
196
190
  wrapped_lines = wrap_long_line(line)
197
191
 
198
192
  wrapped_lines.each do |wrapped_line|
199
- # If at max row, need to scroll before outputting
200
- if @output_row > max_output_row
201
- # Move to bottom of screen and print newline to trigger scroll
202
- screen.move_cursor(screen.height - 1, 0)
203
- print "\n"
204
- # Stay at max_output_row for next output
205
- @output_row = max_output_row
206
- end
207
-
208
- # Output line at current position
209
- screen.move_cursor(@output_row, 0)
210
- screen.clear_line
211
- output_area.append(wrapped_line)
212
- @output_row += 1
193
+ write_output_line(wrapped_line)
213
194
  end
214
195
  end
215
196
 
216
- # Re-render fixed areas at screen bottom
197
+ # Re-render fixed areas to ensure they stay at bottom
217
198
  render_fixed_areas
218
199
  screen.flush
219
200
  end
@@ -223,40 +204,52 @@ module Clacky
223
204
  # @param content [String] Content to update
224
205
  def update_last_line(content)
225
206
  @render_mutex.synchronize do
226
- # Last output line is at @output_row - 1
227
- last_row = [@output_row - 1, 0].max
207
+ return if @output_row == 0 # No output yet
208
+
209
+ # Last written line is at @output_row - 1
210
+ last_row = @output_row - 1
228
211
  screen.move_cursor(last_row, 0)
229
212
  screen.clear_line
230
- output_area.append(content)
231
- render_fixed_areas
213
+ print content
214
+
215
+ # Hide terminal cursor to avoid showing two cursors
216
+ # InlineInput uses visual cursor (white background) which is better for multi-byte chars
217
+ screen.hide_cursor
232
218
  screen.flush
219
+
220
+ # Don't re-render fixed areas - we're just updating existing content
233
221
  end
234
222
  end
235
223
 
236
224
  # Remove the last line from output area
237
225
  def remove_last_line
238
226
  @render_mutex.synchronize do
239
- last_row = [@output_row - 1, 0].max
227
+ return if @output_row == 0 # No output to remove
228
+
229
+ # Clear the last written line
230
+ last_row = @output_row - 1
240
231
  screen.move_cursor(last_row, 0)
241
232
  screen.clear_line
242
- @output_row = last_row if @output_row > 0
233
+
234
+ # Move output row back
235
+ @output_row = last_row
236
+
237
+ # Re-render fixed areas to ensure consistency
243
238
  render_fixed_areas
244
239
  screen.flush
245
240
  end
246
241
  end
247
242
 
248
- # Scroll output area up
243
+ # Scroll output area up (legacy no-op)
249
244
  # @param lines [Integer] Number of lines to scroll
250
245
  def scroll_output_up(lines = 1)
251
- output_area.scroll_up(lines)
252
- render_output
246
+ # No-op - terminal handles scrolling natively
253
247
  end
254
248
 
255
- # Scroll output area down
249
+ # Scroll output area down (legacy no-op)
256
250
  # @param lines [Integer] Number of lines to scroll
257
251
  def scroll_output_down(lines = 1)
258
- output_area.scroll_down(lines)
259
- render_output
252
+ # No-op - terminal handles scrolling natively
260
253
  end
261
254
 
262
255
  # Handle window resize
@@ -266,12 +259,14 @@ module Clacky
266
259
  screen.update_dimensions
267
260
  calculate_layout
268
261
 
269
- # Adjust output_row if it exceeds new max
270
- max_row = fixed_area_start_row - 1
271
- @output_row = [@output_row, max_row].min
262
+ # Adjust @output_row if it exceeds new layout
263
+ # After resize, @output_row should not exceed fixed_area_start_row
264
+ max_allowed = fixed_area_start_row
265
+ @output_row = [@output_row, max_allowed].min
272
266
 
273
- # Clear old fixed area lines
274
- ([old_gap_row, 0].max...screen.height).each do |row|
267
+ # Clear old fixed area and some lines above (terminal may have wrapped content)
268
+ clear_start = [old_gap_row - 5, 0].max
269
+ (clear_start...screen.height).each do |row|
275
270
  screen.move_cursor(row, 0)
276
271
  screen.clear_line
277
272
  end
@@ -282,6 +277,35 @@ module Clacky
282
277
 
283
278
  private
284
279
 
280
+ # Write a single line to output area
281
+ # Handles scrolling when reaching fixed area
282
+ # @param line [String] Single line to write (should not contain newlines)
283
+ def write_output_line(line)
284
+ # Calculate where fixed area starts (this is where output area ends)
285
+ max_output_row = fixed_area_start_row
286
+
287
+ # If we're about to write into the fixed area, scroll first
288
+ if @output_row >= max_output_row
289
+ # Trigger terminal scroll by printing newline at bottom
290
+ screen.move_cursor(screen.height - 1, 0)
291
+ print "\n"
292
+
293
+ # After scroll, position to write at the last row of output area
294
+ @output_row = max_output_row - 1
295
+
296
+ # Important: Re-render fixed areas after scroll to prevent corruption
297
+ render_fixed_areas
298
+ end
299
+
300
+ # Now write the line at current position
301
+ screen.move_cursor(@output_row, 0)
302
+ screen.clear_line
303
+ print line
304
+
305
+ # Move to next row for next write
306
+ @output_row += 1
307
+ end
308
+
285
309
  # Wrap a long line into multiple lines based on terminal width
286
310
  # Considers display width of multi-byte characters (e.g., Chinese characters)
287
311
  # @param line [String] Line to wrap
@@ -412,7 +436,7 @@ module Clacky
412
436
 
413
437
  # Internal render all (without mutex)
414
438
  def render_all_internal
415
- output_area.render(start_row: 0)
439
+ # Output flows naturally, just render fixed areas
416
440
  render_fixed_areas
417
441
  screen.flush
418
442
  end
@@ -29,12 +29,12 @@ module Clacky
29
29
  }.freeze
30
30
 
31
31
  COLORS = {
32
- user: [:bright_blue, :blue],
32
+ user: [:white, :white],
33
33
  assistant: [:bright_green, :white],
34
34
  tool_call: [:bright_cyan, :cyan],
35
35
  tool_result: [:cyan, :white],
36
36
  tool_denied: [:bright_yellow, :yellow],
37
- tool_planned: [:bright_blue, :blue],
37
+ tool_planned: [:bright_cyan, :cyan],
38
38
  tool_error: [:bright_red, :red],
39
39
  thinking: [:dim, :dim],
40
40
  working: [:bright_yellow, :yellow],
@@ -26,12 +26,12 @@ module Clacky
26
26
  }.freeze
27
27
 
28
28
  COLORS = {
29
- user: [:blue, :blue],
29
+ user: [:white, :white],
30
30
  assistant: [:green, :white],
31
31
  tool_call: [:cyan, :cyan],
32
32
  tool_result: [:white, :white],
33
33
  tool_denied: [:yellow, :yellow],
34
- tool_planned: [:blue, :blue],
34
+ tool_planned: [:cyan, :cyan],
35
35
  tool_error: [:red, :red],
36
36
  thinking: [:dim, :dim],
37
37
  working: [:bright_yellow, :yellow],
@@ -2,7 +2,6 @@
2
2
 
3
3
  require_relative "layout_manager"
4
4
  require_relative "view_renderer"
5
- require_relative "components/output_area"
6
5
  require_relative "components/input_area"
7
6
  require_relative "components/todo_area"
8
7
  require_relative "components/welcome_banner"
@@ -31,13 +30,11 @@ module Clacky
31
30
  }
32
31
 
33
32
  # Initialize layout components
34
- @output_area = Components::OutputArea.new(height: 20) # Will be recalculated
35
33
  @input_area = Components::InputArea.new
36
34
  @todo_area = Components::TodoArea.new
37
35
  @welcome_banner = Components::WelcomeBanner.new
38
36
  @inline_input = nil # Created when needed
39
37
  @layout = LayoutManager.new(
40
- output_area: @output_area,
41
38
  input_area: @input_area,
42
39
  todo_area: @todo_area
43
40
  )
@@ -211,19 +208,21 @@ module Clacky
211
208
  # - cost: cost for this iteration
212
209
  def show_token_usage(token_data)
213
210
  theme = ThemeManager.current_theme
211
+ pastel = Pastel.new
214
212
 
215
213
  token_info = []
216
214
 
217
- # Delta tokens with color coding
215
+ # Delta tokens with color coding (green/yellow/red + dim)
218
216
  delta_tokens = token_data[:delta_tokens]
219
217
  delta_str = "+#{delta_tokens}"
220
- colored_delta = if delta_tokens > 10000
221
- theme.format_text(delta_str, :error)
218
+ color_style = if delta_tokens > 10000
219
+ :red
222
220
  elsif delta_tokens > 5000
223
- theme.format_text(delta_str, :warning)
221
+ :yellow
224
222
  else
225
- theme.format_text(delta_str, :success)
223
+ :green
226
224
  end
225
+ colored_delta = pastel.decorate(delta_str, color_style, :dim)
227
226
  token_info << colored_delta
228
227
 
229
228
  # Cache status indicator (using theme)
@@ -231,31 +230,44 @@ module Clacky
231
230
  cache_read = token_data[:cache_read]
232
231
  cache_used = cache_read > 0 || cache_write > 0
233
232
  if cache_used
234
- token_info << theme.format_symbol(:cached)
233
+ token_info << pastel.dim(theme.symbol(:cached))
235
234
  end
236
235
 
237
236
  # Input tokens (with cache breakdown if available)
238
237
  prompt_tokens = token_data[:prompt_tokens]
239
238
  if cache_write > 0 || cache_read > 0
240
239
  input_detail = "#{prompt_tokens} (cache: #{cache_read} read, #{cache_write} write)"
241
- token_info << "Input: #{input_detail}"
240
+ token_info << pastel.dim("Input: #{input_detail}")
242
241
  else
243
- token_info << "Input: #{prompt_tokens}"
242
+ token_info << pastel.dim("Input: #{prompt_tokens}")
244
243
  end
245
244
 
246
245
  # Output tokens
247
- token_info << "Output: #{token_data[:completion_tokens]}"
246
+ token_info << pastel.dim("Output: #{token_data[:completion_tokens]}")
248
247
 
249
248
  # Total
250
- token_info << "Total: #{token_data[:total_tokens]}"
249
+ token_info << pastel.dim("Total: #{token_data[:total_tokens]}")
251
250
 
252
- # Cost for this iteration
251
+ # Cost for this iteration with color coding (red/yellow for high cost, dim for normal)
253
252
  if token_data[:cost]
254
- token_info << "Cost: $#{token_data[:cost].round(6)}"
253
+ cost = token_data[:cost]
254
+ cost_value = "$#{cost.round(6)}"
255
+ if cost >= 0.1
256
+ # High cost - red warning
257
+ colored_cost = pastel.decorate(cost_value, :red, :dim)
258
+ token_info << pastel.dim("Cost: ") + colored_cost
259
+ elsif cost >= 0.05
260
+ # Medium cost - yellow warning
261
+ colored_cost = pastel.decorate(cost_value, :yellow, :dim)
262
+ token_info << pastel.dim("Cost: ") + colored_cost
263
+ else
264
+ # Low cost - normal gray
265
+ token_info << pastel.dim("Cost: #{cost_value}")
266
+ end
255
267
  end
256
268
 
257
- # Display through output system
258
- token_display = theme.format_text(" [Tokens] #{token_info.join(' | ')}", :thinking)
269
+ # Display through output system (already all dimmed, just add prefix)
270
+ token_display = pastel.dim(" [Tokens] ") + token_info.join(pastel.dim(' | '))
259
271
  append_output(token_display)
260
272
  end
261
273
 
@@ -313,10 +325,29 @@ module Clacky
313
325
  # Show assistant message
314
326
  # @param content [String] Message content
315
327
  def show_assistant_message(content)
316
- output = @renderer.render_assistant_message(content)
328
+ # Filter out thinking tags from models like MiniMax M2.1 that use <think>...</think>
329
+ filtered_content = filter_thinking_tags(content)
330
+ return if filtered_content.nil? || filtered_content.strip.empty?
331
+
332
+ output = @renderer.render_assistant_message(filtered_content)
317
333
  append_output(output)
318
334
  end
319
335
 
336
+ # Filter out thinking tags from content
337
+ # Some models (e.g., MiniMax M2.1) wrap their reasoning in <think>...</think> tags
338
+ # @param content [String] Raw content from model
339
+ # @return [String] Content with thinking tags removed
340
+ def filter_thinking_tags(content)
341
+ return content if content.nil?
342
+
343
+ # Remove <think>...</think> blocks (multiline, case-insensitive)
344
+ # Also handles variations like <thinking>...</thinking>
345
+ filtered = content.gsub(%r{<think(?:ing)?>\s*.*?\s*</think(?:ing)?>}mi, '')
346
+
347
+ # Clean up extra whitespace left behind
348
+ filtered.gsub(/\n{3,}/, "\n\n").strip
349
+ end
350
+
320
351
  # Show tool call
321
352
  # @param name [String] Tool name
322
353
  # @param args [String, Hash] Tool arguments (JSON string or Hash)
@@ -564,9 +595,38 @@ module Clacky
564
595
 
565
596
  diff = Diffy::Diff.new(old_content, new_content, context: 3)
566
597
  all_lines = diff.to_s(:color).lines
567
- display_lines = all_lines.first(max_lines)
598
+ plain_lines = diff.to_s.lines
599
+
600
+ # Add line numbers to diff output
601
+ old_line_num = 0
602
+ new_line_num = 0
603
+
604
+ numbered_lines = all_lines.each_with_index.map do |line, index|
605
+ # Use plain text to detect line type (remove ANSI codes)
606
+ plain_line = plain_lines[index]&.chomp || line.gsub(/\e\[[0-9;]*m/, '').chomp
607
+
608
+ # Remove trailing newline from colored line to avoid double newlines
609
+ colored_line = line.chomp
610
+
611
+ # Determine line type and number (use single line number for simplicity)
612
+ if plain_line.start_with?('+') || plain_line.start_with?('-') || plain_line.start_with?(' ')
613
+ new_line_num += 1
614
+ sprintf("%4d | %s", new_line_num, colored_line)
615
+ elsif plain_line.start_with?('@@')
616
+ # Diff header: extract line numbers from @@ -old_start,old_count +new_start,new_count @@
617
+ if plain_line =~ /@@ -(\d+)(?:,\d+)? (\d+)(?:,\d+)? @@/
618
+ new_line_num = $2.to_i - 1
619
+ end
620
+ sprintf("%4s | %s", "", colored_line)
621
+ else
622
+ # Other lines (headers, etc.)
623
+ sprintf("%4s | %s", "", colored_line)
624
+ end
625
+ end
626
+
627
+ display_lines = numbered_lines.first(max_lines)
628
+ display_lines.each { |line| append_output(line) }
568
629
 
569
- display_lines.each { |line| append_output(line.chomp) }
570
630
  if all_lines.size > max_lines
571
631
  append_output("\n... (#{all_lines.size - max_lines} more lines, diff truncated)")
572
632
  end
data/lib/clacky/ui2.rb CHANGED
@@ -10,7 +10,6 @@ require_relative "ui2/view_renderer"
10
10
  require_relative "ui2/ui_controller"
11
11
 
12
12
  require_relative "ui2/components/base_component"
13
- require_relative "ui2/components/output_area"
14
13
  require_relative "ui2/components/input_area"
15
14
  require_relative "ui2/components/message_component"
16
15
  require_relative "ui2/components/tool_component"
@@ -44,10 +44,11 @@ module Clacky
44
44
  result
45
45
  end
46
46
 
47
- # Validate required parameters
47
+ # Validate required parameters and filter unknown parameters
48
48
  def self.validate_required_params(call, args, tool_registry)
49
49
  tool = tool_registry.get(call[:name])
50
50
  required = tool.parameters&.dig(:required) || []
51
+ properties = tool.parameters&.dig(:properties) || {}
51
52
 
52
53
  missing = required.reject { |param|
53
54
  args.key?(param.to_sym) || args.key?(param.to_s)
@@ -57,7 +58,11 @@ module Clacky
57
58
  raise MissingRequiredParamsError.new(call[:name], missing, args.keys)
58
59
  end
59
60
 
60
- args
61
+ # Filter out unknown parameters to prevent errors when LLM sends extra arguments
62
+ known_params = properties.keys.map(&:to_sym) + properties.keys.map(&:to_s)
63
+ filtered_args = args.select { |key, _| known_params.include?(key) }
64
+
65
+ filtered_args
61
66
  end
62
67
 
63
68
  # Generate error message with tool definition
@@ -0,0 +1,201 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "base64"
4
+
5
+ module Clacky
6
+ module Utils
7
+ # File processing utilities for binary files, images, and PDFs
8
+ class FileProcessor
9
+ # Maximum file size for binary files (5MB)
10
+ MAX_FILE_SIZE = 5 * 1024 * 1024
11
+
12
+ # Supported image formats
13
+ IMAGE_FORMATS = {
14
+ "png" => "image/png",
15
+ "jpg" => "image/jpeg",
16
+ "jpeg" => "image/jpeg",
17
+ "gif" => "image/gif",
18
+ "webp" => "image/webp"
19
+ }.freeze
20
+
21
+ # Supported document formats
22
+ DOCUMENT_FORMATS = {
23
+ "pdf" => "application/pdf"
24
+ }.freeze
25
+
26
+ # All supported formats
27
+ SUPPORTED_FORMATS = IMAGE_FORMATS.merge(DOCUMENT_FORMATS).freeze
28
+
29
+ # File signatures (magic bytes) for format detection
30
+ FILE_SIGNATURES = {
31
+ "\x89PNG\r\n\x1a\n".b => "png",
32
+ "\xFF\xD8\xFF".b => "jpg",
33
+ "GIF87a".b => "gif",
34
+ "GIF89a".b => "gif",
35
+ "%PDF".b => "pdf"
36
+ }.freeze
37
+
38
+ class << self
39
+ # Convert image file path to base64 data URL
40
+ # @param path [String] File path to image
41
+ # @return [String] base64 data URL (e.g., "data:image/png;base64,...")
42
+ # @raise [ArgumentError] If file not found or unsupported format
43
+ def image_path_to_data_url(path)
44
+ unless File.exist?(path)
45
+ raise ArgumentError, "Image file not found: #{path}"
46
+ end
47
+
48
+ # Check file size
49
+ file_size = File.size(path)
50
+ if file_size > MAX_FILE_SIZE
51
+ raise ArgumentError, "File too large: #{file_size} bytes (max: #{MAX_FILE_SIZE} bytes)"
52
+ end
53
+
54
+ # Read file as binary
55
+ image_data = File.binread(path)
56
+
57
+ # Detect MIME type from file extension or content
58
+ mime_type = detect_mime_type(path, image_data)
59
+
60
+ # Verify it's an image format
61
+ unless IMAGE_FORMATS.values.include?(mime_type)
62
+ raise ArgumentError, "Unsupported image format: #{mime_type}"
63
+ end
64
+
65
+ # Encode to base64
66
+ base64_data = Base64.strict_encode64(image_data)
67
+
68
+ "data:#{mime_type};base64,#{base64_data}"
69
+ end
70
+
71
+ # Convert file to base64 with format detection
72
+ # @param path [String] File path
73
+ # @return [Hash] Hash with :format, :mime_type, :base64_data, :size_bytes
74
+ # @raise [ArgumentError] If file not found or too large
75
+ def file_to_base64(path)
76
+ unless File.exist?(path)
77
+ raise ArgumentError, "File not found: #{path}"
78
+ end
79
+
80
+ # Check file size
81
+ file_size = File.size(path)
82
+ if file_size > MAX_FILE_SIZE
83
+ raise ArgumentError, "File too large: #{file_size} bytes (max: #{MAX_FILE_SIZE} bytes)"
84
+ end
85
+
86
+ # Read file as binary
87
+ file_data = File.binread(path)
88
+
89
+ # Detect format and MIME type
90
+ format = detect_format(path, file_data)
91
+ mime_type = detect_mime_type(path, file_data)
92
+
93
+ # Encode to base64
94
+ base64_data = Base64.strict_encode64(file_data)
95
+
96
+ {
97
+ format: format,
98
+ mime_type: mime_type,
99
+ base64_data: base64_data,
100
+ size_bytes: file_size
101
+ }
102
+ end
103
+
104
+ # Detect file format from path and content
105
+ # @param path [String] File path
106
+ # @param data [String] Binary file data
107
+ # @return [String] Format (e.g., "png", "jpg", "pdf")
108
+ def detect_format(path, data)
109
+ # Try to detect from file extension first
110
+ ext = File.extname(path).downcase.delete_prefix(".")
111
+ return ext if SUPPORTED_FORMATS.key?(ext)
112
+
113
+ # Try to detect from file signature (magic bytes)
114
+ FILE_SIGNATURES.each do |signature, format|
115
+ return format if data.start_with?(signature)
116
+ end
117
+
118
+ # Special case for WebP (RIFF format)
119
+ if data.start_with?("RIFF".b) && data[8..11] == "WEBP".b
120
+ return "webp"
121
+ end
122
+
123
+ nil
124
+ end
125
+
126
+ # Detect MIME type from file path and content
127
+ # @param path [String] File path
128
+ # @param data [String] Binary file data
129
+ # @return [String] MIME type (e.g., "image/png")
130
+ def detect_mime_type(path, data)
131
+ format = detect_format(path, data)
132
+ return SUPPORTED_FORMATS[format] if format && SUPPORTED_FORMATS[format]
133
+
134
+ # Default to application/octet-stream for unknown formats
135
+ "application/octet-stream"
136
+ end
137
+
138
+ # Check if file is a supported binary format
139
+ # @param path [String] File path
140
+ # @return [Boolean] True if supported binary format
141
+ def supported_binary_file?(path)
142
+ return false unless File.exist?(path)
143
+
144
+ ext = File.extname(path).downcase.delete_prefix(".")
145
+ SUPPORTED_FORMATS.key?(ext)
146
+ end
147
+
148
+ # Check if file is an image
149
+ # @param path [String] File path
150
+ # @return [Boolean] True if image format
151
+ def image_file?(path)
152
+ return false unless File.exist?(path)
153
+
154
+ ext = File.extname(path).downcase.delete_prefix(".")
155
+ IMAGE_FORMATS.key?(ext)
156
+ end
157
+
158
+ # Check if file is a PDF
159
+ # @param path [String] File path
160
+ # @return [Boolean] True if PDF format
161
+ def pdf_file?(path)
162
+ return false unless File.exist?(path)
163
+
164
+ ext = File.extname(path).downcase.delete_prefix(".")
165
+ ext == "pdf"
166
+ end
167
+
168
+ # Check if file is binary (not text)
169
+ # @param data [String] File content
170
+ # @param sample_size [Integer] Number of bytes to check (default: 8192)
171
+ # @return [Boolean] True if file appears to be binary
172
+ def binary_file?(data, sample_size: 8192)
173
+ # Check first N bytes for null bytes or high ratio of non-printable characters
174
+ sample = data[0, sample_size] || ""
175
+ return false if sample.empty?
176
+
177
+ # Check for known binary signatures first
178
+ FILE_SIGNATURES.each do |signature, _format|
179
+ return true if sample.start_with?(signature)
180
+ end
181
+
182
+ # Check for WebP (RIFF format)
183
+ if sample.start_with?("RIFF".b) && sample.length >= 12 && sample[8..11] == "WEBP".b
184
+ return true
185
+ end
186
+
187
+ # If contains null bytes, it's binary
188
+ return true if sample.include?("\x00")
189
+
190
+ # Count non-printable characters (excluding common whitespace)
191
+ non_printable = sample.bytes.count do |byte|
192
+ byte < 32 && ![9, 10, 13].include?(byte) || byte >= 127
193
+ end
194
+
195
+ # If more than 30% non-printable, consider it binary
196
+ (non_printable.to_f / sample.size) > 0.3
197
+ end
198
+ end
199
+ end
200
+ end
201
+ end