aia 0.5.17 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (51) hide show
  1. checksums.yaml +4 -4
  2. data/.envrc +1 -0
  3. data/.version +1 -2
  4. data/CHANGELOG.md +61 -22
  5. data/README.md +387 -227
  6. data/Rakefile +16 -5
  7. data/_notes.txt +231 -0
  8. data/bin/aia +3 -2
  9. data/examples/README.md +140 -0
  10. data/examples/headlines +21 -0
  11. data/justfile +16 -3
  12. data/lib/aia/ai_client_adapter.rb +210 -0
  13. data/lib/aia/chat_processor_service.rb +120 -0
  14. data/lib/aia/config.rb +473 -4
  15. data/lib/aia/context_manager.rb +58 -0
  16. data/lib/aia/directive_processor.rb +267 -0
  17. data/lib/aia/{tools/fzf.rb → fzf.rb} +9 -17
  18. data/lib/aia/history_manager.rb +85 -0
  19. data/lib/aia/prompt_handler.rb +178 -0
  20. data/lib/aia/session.rb +215 -0
  21. data/lib/aia/shell_command_executor.rb +109 -0
  22. data/lib/aia/ui_presenter.rb +110 -0
  23. data/lib/aia/utility.rb +24 -0
  24. data/lib/aia/version.rb +9 -6
  25. data/lib/aia.rb +57 -61
  26. data/lib/extensions/openstruct_merge.rb +44 -0
  27. metadata +43 -42
  28. data/LICENSE.txt +0 -21
  29. data/doc/aia_and_pre_compositional_prompts.md +0 -474
  30. data/lib/aia/clause.rb +0 -7
  31. data/lib/aia/cli.rb +0 -452
  32. data/lib/aia/directives.rb +0 -142
  33. data/lib/aia/dynamic_content.rb +0 -26
  34. data/lib/aia/logging.rb +0 -62
  35. data/lib/aia/main.rb +0 -265
  36. data/lib/aia/prompt.rb +0 -275
  37. data/lib/aia/tools/backend_common.rb +0 -58
  38. data/lib/aia/tools/client.rb +0 -197
  39. data/lib/aia/tools/editor.rb +0 -52
  40. data/lib/aia/tools/glow.rb +0 -90
  41. data/lib/aia/tools/llm.rb +0 -77
  42. data/lib/aia/tools/mods.rb +0 -100
  43. data/lib/aia/tools/sgpt.rb +0 -79
  44. data/lib/aia/tools/subl.rb +0 -68
  45. data/lib/aia/tools/vim.rb +0 -93
  46. data/lib/aia/tools.rb +0 -88
  47. data/lib/aia/user_query.rb +0 -21
  48. data/lib/core_ext/string_wrap.rb +0 -73
  49. data/lib/core_ext/tty-spinner_log.rb +0 -25
  50. data/man/aia.1 +0 -272
  51. data/man/aia.1.md +0 -236
data/Rakefile CHANGED
@@ -12,12 +12,23 @@ require 'kramdown/man/task'
12
12
  Kramdown::Man::Task.new
13
13
 
14
14
  require "bundler/gem_tasks"
15
- require "rake/testtask"
15
+ require "minitest/test_task"
16
16
 
17
- Rake::TestTask.new(:test) do |t|
18
- t.libs << "test"
19
- t.libs << "lib"
20
- t.test_files = FileList["test/**/*_test.rb"]
17
+ Minitest::TestTask.create(:test) do |t|
18
+ t.libs << "test"
19
+ t.libs << "lib"
20
+ t.warning = false
21
+ t.test_globs = ["test/aia/*_test.rb", "test/aia_test.rb", "!test/integration/**/*_test.rb"]
21
22
  end
22
23
 
24
+ Minitest::TestTask.create(:integration) do |t|
25
+ t.libs << "test"
26
+ t.libs << "lib"
27
+ t.warning = false
28
+ t.test_globs = ["test/integration/**/*_test.rb"]
29
+ end
30
+
31
+ desc "Run all tests including integration tests"
32
+ task all_tests: [:test, :integration]
33
+
23
34
  task default: :test
data/_notes.txt ADDED
@@ -0,0 +1,231 @@
1
+
2
+ --- 2025-02-01 18:01:36 -0600
3
+ I have no idea where I left off in this branch. The objective is to replace all the back-end processes with AiClient.
4
+
5
+ Tests are failing.
6
+
7
+ Make a few changes. It seems to be working in its basic modes.
8
+
9
+ --- 2025-02-21 20:13:19 -0600
10
+ Implemented Stark's clean slate protocol
11
+
12
+
13
+
14
+
15
+
16
+ --- 2025-03-29 21:39:46 -0500
17
+ starting the refactor to take advantage of the new capability of the PromptMananger gem.
18
+
19
+ lib/aia/chat_processor_service.rb
20
+
21
+
22
+
23
+ --- 2025-04-03 22:17:11 -0500
24
+ i have been tring to get multi-line input to work in the chat mode but have run into all kinds of problems. I think it would be best just to invoke the users editor for that kind of operation. Alo I am not sure but I thing the same ask method is used for getting values for parameters. changes may have been committed but they should be reversed back to the original and start over.
25
+
26
+ def get_multiline_input
27
+ input_lines = []
28
+ current_line = ""
29
+ last_key_time = Time.now
30
+ waiting_printed = 0 # Track number of WAITING characters printed
31
+
32
+ STDIN.raw! # Enable raw mode for immediate keypress detection
33
+ begin
34
+ loop do
35
+ begin
36
+ r, _, _ = IO.select([STDIN], nil, nil, 0.1)
37
+ if r
38
+ char = STDIN.getc
39
+ last_key_time = Time.now
40
+ # Clear waiting characters when user types again
41
+ if waiting_printed > 0
42
+ print WAITING_ERASE * waiting_printed # Erase all waiting characters
43
+ $stdout.flush
44
+ waiting_printed = 0
45
+ end
46
+ else
47
+ if (Time.now - last_key_time >= KEYPRESS_TIMEUT) &&
48
+ waiting_printed == 0 &&
49
+ (!input_lines.empty? || !current_line.empty?)
50
+ print WAITING
51
+ $stdout.flush
52
+ waiting_printed = 1 # Record one '?' printed
53
+ end
54
+ next
55
+ end
56
+
57
+ rescue Interrupt
58
+ puts "\nInput cancelled. Discarding current input; please start over."
59
+ input_lines = []
60
+ current_line = ""
61
+ waiting_printed = 0
62
+ last_key_time = Time.now
63
+ next
64
+ end
65
+
66
+ break if char.nil? # Handle EOF (Ctrl+D)
67
+
68
+ if char == "\r" || char == "\n"
69
+ if current_line.empty? && !input_lines.empty?
70
+ break # Two Enters in a row submits
71
+ else
72
+ input_lines << current_line
73
+ current_line = ""
74
+ waiting_printed = 0 # Reset waiting on new line
75
+ print "\n\r"
76
+ $stdout.flush
77
+ end
78
+
79
+ elsif char == "\x04" # Ctrl+D
80
+ break
81
+
82
+ elsif char == "\x08" || char == "\x7F" # Backspace or Delete
83
+ if !current_line.empty?
84
+ current_line.chop!
85
+ print WAITING_ERASE
86
+ $stdout.flush
87
+ elsif waiting_printed > 0
88
+ # Clear one waiting character if current_line is empty
89
+ print "\b \b"
90
+ $stdout.flush
91
+ waiting_printed -= 1
92
+ end
93
+
94
+ else
95
+ current_line << char
96
+ print char
97
+ $stdout.flush
98
+ end
99
+ end
100
+
101
+ ensure
102
+ STDIN.cooked! # Restore terminal to normal mode
103
+ end
104
+
105
+ input_lines << current_line unless current_line.empty?
106
+
107
+ # Handle single-line special case
108
+ if input_lines.size == 1
109
+ if special_first_line_processing(input_lines.first)
110
+ # If special (starts with "//"), return immediately as if double return was pressed
111
+ return input_lines.first
112
+ else
113
+ # If not special, keep as is and return the full input
114
+ return input_lines.join("\n")
115
+ end
116
+ end
117
+
118
+ input_lines.join("\n").tap do |result|
119
+ puts "\n" if result.empty? # Clean up display if no input
120
+ end
121
+
122
+ rescue EOFError
123
+ input_lines.join("\n")
124
+ end
125
+
126
+
127
+
128
+ --- 2025-04-03 22:18:18 -0500
129
+ by using subl -w for multi-line input in chat mode that gives us the ability to write ERB for chat input.
130
+
131
+ def get_multiline_input
132
+ input_lines = []
133
+ current_line = ""
134
+ last_key_time = Time.now
135
+ waiting_printed = 0 # Track number of WAITING characters printed
136
+
137
+ STDIN.raw! # Enable raw mode for immediate keypress detection
138
+ begin
139
+ loop do
140
+ begin
141
+ r, _, _ = IO.select([STDIN], nil, nil, 0.1)
142
+ if r
143
+ char = STDIN.getc
144
+ last_key_time = Time.now
145
+ # Clear waiting characters when user types again
146
+ if waiting_printed > 0
147
+ print WAITING_ERASE * waiting_printed # Erase all waiting characters
148
+ $stdout.flush
149
+ waiting_printed = 0
150
+ end
151
+ else
152
+ if (Time.now - last_key_time >= KEYPRESS_TIMEUT) &&
153
+ waiting_printed == 0 &&
154
+ (!input_lines.empty? || !current_line.empty?)
155
+ print WAITING
156
+ $stdout.flush
157
+ waiting_printed = 1 # Record one '?' printed
158
+ end
159
+ next
160
+ end
161
+
162
+ rescue Interrupt
163
+ puts "\nInput cancelled. Discarding current input; please start over."
164
+ input_lines = []
165
+ current_line = ""
166
+ waiting_printed = 0
167
+ last_key_time = Time.now
168
+ next
169
+ end
170
+
171
+ break if char.nil? # Handle EOF (Ctrl+D)
172
+
173
+ if char == "\r" || char == "\n"
174
+ if current_line.empty? && !input_lines.empty?
175
+ break # Two Enters in a row submits
176
+ else
177
+ input_lines << current_line
178
+ current_line = ""
179
+ waiting_printed = 0 # Reset waiting on new line
180
+ print "\n\r"
181
+ $stdout.flush
182
+ end
183
+
184
+ elsif char == "\x04" # Ctrl+D
185
+ break
186
+
187
+ elsif char == "\x08" || char == "\x7F" # Backspace or Delete
188
+ if !current_line.empty?
189
+ current_line.chop!
190
+ print WAITING_ERASE
191
+ $stdout.flush
192
+ elsif waiting_printed > 0
193
+ # Clear one waiting character if current_line is empty
194
+ print "\b \b"
195
+ $stdout.flush
196
+ waiting_printed -= 1
197
+ end
198
+
199
+ else
200
+ current_line << char
201
+ print char
202
+ $stdout.flush
203
+ end
204
+ end
205
+
206
+ ensure
207
+ STDIN.cooked! # Restore terminal to normal mode
208
+ end
209
+
210
+ input_lines << current_line unless current_line.empty?
211
+
212
+ # Handle single-line special case
213
+ if input_lines.size == 1
214
+ if special_first_line_processing(input_lines.first)
215
+ # If special (starts with "//"), return immediately as if double return was pressed
216
+ return input_lines.first
217
+ else
218
+ # If not special, keep as is and return the full input
219
+ return input_lines.join("\n")
220
+ end
221
+ end
222
+
223
+ input_lines.join("\n").tap do |result|
224
+ puts "\n" if result.empty? # Clean up display if no input
225
+ end
226
+
227
+ rescue EOFError
228
+ input_lines.join("\n")
229
+ end
230
+
231
+
data/bin/aia CHANGED
@@ -1,5 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
- # bin/aia
2
+ # frozen_string_literal: true
3
+
4
+ require_relative '../lib/aia'
3
5
 
4
- require 'aia'
5
6
  AIA.run
@@ -0,0 +1,140 @@
1
+ # Executable Prompts
2
+
3
+ Executable prompts are a powerful feature of the AI Assistant (`aia`) that enable users to create and run command-line utilities tailored to interact with AI models. These prompts can automate tasks, generate content, and incorporate custom configurations, making them highly versatile for various applications. In this section, we will delve deeper into the nature of executable prompts, how to configure them with directives, and provide an overview of practical examples.
4
+
5
+ ## What is an Executable Prompt?
6
+
7
+ An executable prompt is a special type of prompt file that is structured to allow execution through the `aia` command-line interface. By including specific command-line behavior in the form of a shebang line, the prompt can be invoked directly from the terminal like any other executable script.
8
+
9
+ ### Structure of an Executable Prompt
10
+
11
+ 1. **Shebang Line**: The first line of the prompt indicates how the file should be executed. For instance:
12
+ ```bash
13
+ #!/usr/bin/env aia run --no-out_file
14
+ ```
15
+
16
+ This line tells the system to use the `aia` CLI with the `run` prompt ID for execution, while `--no-out_file` indicates that output should be sent to STDOUT instead of being written to a file.
17
+
18
+ 2. **Content**: Below the shebang line, users can add the prompt content that will be sent to the AI model for processing. This content can use flexible directives and dynamic parameters.
19
+
20
+ ### Example of an Executable Prompt
21
+
22
+ ```bash
23
+ #!/usr/bin/env aia run --no-out_file
24
+ # File: top10
25
+ # Desc: The top 10 cities by population
26
+
27
+ What are the top 10 cities by population in the USA? Summarize what people like about living in each city. Include an average cost of living and links to the respective Wikipedia pages. Format your response as a markdown document.
28
+ ```
29
+
30
+ After making this script executable with `chmod +x top10`, it can be run directly in the terminal:
31
+
32
+ ```bash
33
+ ./top10
34
+ ```
35
+
36
+ ## Using Directives to Configure Execution
37
+
38
+ Directives embedded within executable prompts allow users to configure various execution parameters dynamically. These directives are special commands within the prompt text that guide the prompt's behavior when processed by the AI model.
39
+
40
+ ### Available Directives
41
+
42
+ #### 1. **//config**
43
+
44
+ The `//config` directive is used to modify configuration settings specifically for a particular execution of the prompt. You can set various parameters such as model selection, output handling, or chat mode:
45
+
46
+ ```bash
47
+ //config model = gpt-4
48
+ //config out_file = results.md
49
+ ```
50
+
51
+ **Example**: You can control the model and output settings dynamically without changing global or default settings.
52
+
53
+ #### 2. **//include**
54
+
55
+ This directive allows the inclusion of external files or content right into the prompt. This can be useful for injecting multiple lines of data or complex configurations:
56
+
57
+ ```bash
58
+ //include path/to/config.txt
59
+ ```
60
+
61
+ This will read the content of `config.txt` and prepend it to the prompt context.
62
+
63
+ #### 3. **//shell**
64
+
65
+ Execute shell commands dynamically and include their results in the prompt. This integration can enhance your prompts by feeding them real-time data:
66
+
67
+ ```bash
68
+ //shell echo $(pwd)
69
+ ```
70
+
71
+ This would prepend the current working directory to the prompt's input.
72
+
73
+ ### Parameterization
74
+
75
+ Executable prompts can also accept parameters or keywords that users define themselves. For instance:
76
+
77
+ ```bash
78
+ [MY_TOPIC]
79
+ ```
80
+
81
+ When the prompt runs, users will be prompted to provide a value for `MY_TOPIC`, allowing for flexible and dynamic conversations with the AI model.
82
+
83
+ ## Benefits of Executable Prompts
84
+
85
+ 1. **Automation**: Automate complex tasks by wrapping them in a reusable script.
86
+ 2. **Dynamic Content**: Use directives to dynamically adjust settings, include external data, and run system commands.
87
+ 3. **Ease of Use**: Users can execute prompts directly from the terminal without entering the `aia` command each time.
88
+ 4. **Configuration Flexibility**: Tailor specific prompt executions without altering global settings, giving you full control over the runtime environment.
89
+
90
+ ## Practical Examples of Executable Prompts
91
+
92
+ ### Example 1: Top 10 Cities Script
93
+
94
+ Create a script that gives information on the top cities in the USA:
95
+
96
+ ```bash
97
+ #!/usr/bin/env aia run --no-out_file
98
+ # File: top10
99
+ # Desc: Retrieves top 10 cities by population
100
+
101
+ //config out_file=top10_cities.md
102
+
103
+ What are the top 10 cities by population in the USA? Summarize what people like about living in each city and include links to their respective Wikipedia pages.
104
+ ```
105
+
106
+ ### Example 2: Weather Report
107
+
108
+ ```bash
109
+ #!/usr/bin/env aia run
110
+ # File: weather_report
111
+ # Desc: Gets today's weather
112
+
113
+ //shell curl -s 'wttr.in/Toronto?format=3'
114
+
115
+ Today's weather in Toronto is: $(cat weather_output.txt).
116
+ ```
117
+
118
+ ### Example 3: Dynamic Task Execution
119
+
120
+ A user can create a prompt for performing mathematical calculations:
121
+
122
+ ```bash
123
+ #!/usr/bin/env aia run
124
+ # File: calculate
125
+ # Desc: Simple calculator
126
+
127
+ //config model=gpt-3
128
+
129
+ Please calculate [MATH_EXPRESSION].
130
+ ```
131
+
132
+ When executed, the user is prompted to input the mathematical expression they want to calculate.
133
+
134
+ ### Example of Using STDOUT and Piping
135
+
136
+ You can combine executable prompts with other shell commands to further manipulate the output:
137
+
138
+ ```bash
139
+ ./top10 | glow # Pipe output to glow for rendering markdown in the terminal
140
+ ```
@@ -0,0 +1,21 @@
1
+ #!/usr/bin/env aia run --no-out_file
2
+ # File: examples/headlines
3
+ # Desc: retrieves the news.google.com website
4
+ # extracts and formats the headlines
5
+ # and prints them to the console
6
+
7
+ //config shell = true
8
+ # //config
9
+
10
+ # Puts the webpage into index.html
11
+ # //shell wget2 https://news.google.com
12
+
13
+ $(wget2 https://news.google.com)
14
+
15
+
16
+ # Lets hear the headlines as well as read them.
17
+ //config speak = true
18
+
19
+ Extract and summarize the headlines from the following text:
20
+
21
+ $(html2text index.html)
data/justfile CHANGED
@@ -23,9 +23,10 @@ man_filepath := env_var('RR') + "/man/aia.1.md"
23
23
 
24
24
  set fallback # search up for recipe name if not found locally.
25
25
 
26
- set positional-arguments := true
27
- set allow-duplicate-recipes := true
28
- set dotenv-load := false
26
+ set positional-arguments := true
27
+ set allow-duplicate-recipes := true
28
+ set allow-duplicate-variables := true
29
+ set dotenv-load := false
29
30
 
30
31
  # my common variables
31
32
 
@@ -80,6 +81,18 @@ mods_delete_all:
80
81
  mods -l | awk '{print $1}' | xargs -I {} mods -d {}
81
82
 
82
83
 
84
+ #############################################
85
+ ## iTerm2-related
86
+
87
+ # Fix half-duplex terminal
88
+ fix:
89
+ stty sane
90
+
91
+
92
+ # Clear the scroll-back buffer
93
+ @clear_buffer:
94
+ printf "\e[3J"
95
+
83
96
  #################################################
84
97
  ## Private recipes
85
98
 
@@ -0,0 +1,210 @@
1
+ # lib/aia/ai_client_adapter.rb
2
+ #
3
+
4
+
5
+ require 'ai_client'
6
+ require 'tty-spinner'
7
+
8
+
9
+ module AIA
10
+ class AIClientAdapter
11
+ def initialize
12
+ @model = AIA.config.model
13
+
14
+ model_info = extract_model_parts(@model)
15
+ @client = AiClient.new(
16
+ model_info[:model],
17
+ provider: model_info[:provider]
18
+ )
19
+ end
20
+
21
+
22
+ def chat(prompt)
23
+ if @model.downcase.include?('dall-e') || @model.downcase.include?('image-generation')
24
+ text_to_image(prompt)
25
+ elsif @model.downcase.include?('vision') || @model.downcase.include?('image')
26
+ image_to_text(prompt)
27
+ elsif @model.downcase.include?('tts') || @model.downcase.include?('speech')
28
+ text_to_audio(prompt)
29
+ elsif @model.downcase.include?('whisper') || @model.downcase.include?('transcription')
30
+ audio_to_text(prompt)
31
+ else
32
+ text_to_text(prompt)
33
+ end
34
+ end
35
+
36
+
37
+
38
+ def transcribe(audio_file)
39
+ @client.transcribe(audio_file)
40
+ end
41
+
42
+
43
+
44
+ def speak(text)
45
+ output_file = "#{Time.now.to_i}.mp3"
46
+
47
+ begin
48
+ # Try with options
49
+ @client.speak(text, output_file, {
50
+ model: AIA.config.speech_model,
51
+ voice: AIA.config.voice
52
+ })
53
+ rescue ArgumentError
54
+ @client.speak(text)
55
+ end
56
+
57
+ system("#{AIA.config.speak_command} #{output_file}") if File.exist?(output_file) && system("which #{AIA.config.speak_command} > /dev/null 2>&1")
58
+ end
59
+
60
+ def method_missing(method, *args, &block)
61
+ if @client.respond_to?(method)
62
+ @client.public_send(method, *args, &block)
63
+ else
64
+ super
65
+ end
66
+ end
67
+
68
+ def respond_to_missing?(method, include_private = false)
69
+ @client.respond_to?(method) || super
70
+ end
71
+
72
+ private
73
+
74
+
75
+
76
+ def extract_model_parts(model_string)
77
+ parts = model_string.split('/')
78
+ parts.map!(&:strip)
79
+
80
+ if parts.length > 1
81
+ provider = parts[0]
82
+ model = parts[1]
83
+ else
84
+ provider = nil # AiClient will figure it out from the model name
85
+ model = parts[0]
86
+ end
87
+
88
+
89
+
90
+ { provider: provider, model: model }
91
+ end
92
+
93
+
94
+
95
+ def extract_text_prompt(prompt)
96
+
97
+ if prompt.is_a?(String)
98
+ prompt
99
+ elsif prompt.is_a?(Hash) && prompt[:text]
100
+ prompt[:text]
101
+ elsif prompt.is_a?(Hash) && prompt[:content]
102
+ prompt[:content]
103
+ else
104
+ prompt.to_s
105
+ end
106
+ end
107
+
108
+
109
+
110
+ def text_to_text(prompt)
111
+ text_prompt = extract_text_prompt(prompt)
112
+ @client.chat(text_prompt)
113
+ end
114
+
115
+
116
+
117
+ def text_to_image(prompt)
118
+ text_prompt = extract_text_prompt(prompt)
119
+
120
+
121
+ output_file = "#{Time.now.to_i}.png"
122
+
123
+ begin
124
+ begin
125
+ @client.generate_image(text_prompt, output_file, {
126
+ size: AIA.config.image_size,
127
+ quality: AIA.config.image_quality,
128
+ style: AIA.config.image_style
129
+ })
130
+ rescue ArgumentError
131
+ @client.generate_image(text_prompt)
132
+ end
133
+
134
+ "Image generated and saved to: #{output_file}"
135
+ rescue => e
136
+ "Error generating image: #{e.message}"
137
+ end
138
+ end
139
+
140
+
141
+
142
+ def image_to_text(prompt)
143
+ image_path = extract_image_path(prompt)
144
+ text_prompt = extract_text_prompt(prompt)
145
+
146
+ if image_path && File.exist?(image_path)
147
+ begin
148
+ @client.chat("#{text_prompt}\n[Analyzing image: #{image_path}]")
149
+ rescue => e
150
+ "Error analyzing image: #{e.message}"
151
+ end
152
+ else
153
+ text_to_text(prompt)
154
+ end
155
+ end
156
+
157
+
158
+
159
+ def text_to_audio(prompt)
160
+ text_prompt = extract_text_prompt(prompt)
161
+
162
+ output_file = "#{Time.now.to_i}.mp3"
163
+
164
+ begin
165
+ begin
166
+ @client.speak(text_prompt, output_file, {
167
+ model: AIA.config.speech_model,
168
+ voice: AIA.config.voice
169
+ })
170
+ rescue ArgumentError
171
+ @client.speak(text_prompt)
172
+ end
173
+
174
+ system("#{AIA.config.speak_command} #{output_file}") if File.exist?(output_file) && system("which #{AIA.config.speak_command} > /dev/null 2>&1")
175
+
176
+ "Audio generated and saved to: #{output_file}"
177
+ rescue => e
178
+ "Error generating audio: #{e.message}"
179
+ end
180
+ end
181
+
182
+
183
+
184
+ def audio_to_text(prompt)
185
+ if prompt.is_a?(String) && File.exist?(prompt) &&
186
+ prompt.downcase.end_with?('.mp3', '.wav', '.m4a', '.flac')
187
+ begin
188
+ @client.transcribe(prompt)
189
+ rescue => e
190
+ "Error transcribing audio: #{e.message}"
191
+ end
192
+ else
193
+ # Fall back to regular chat if no valid audio file is found
194
+ text_to_text(prompt)
195
+ end
196
+ end
197
+
198
+
199
+
200
+ def extract_image_path(prompt)
201
+ if prompt.is_a?(String)
202
+ prompt.scan(/\b[\w\/\.\-]+\.(jpg|jpeg|png|gif|webp)\b/i).first&.first
203
+ elsif prompt.is_a?(Hash)
204
+ prompt[:image] || prompt[:image_path]
205
+ else
206
+ nil
207
+ end
208
+ end
209
+ end
210
+ end