personality 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,412 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "mcp"
4
+ require "mcp/transports/stdio"
5
+ require "json"
6
+ require "open3"
7
+ require "tempfile"
8
+ require "shellwords"
9
+
10
+ module Personality
11
+ module MCP
12
+ class VoiceServer
13
+ # Moto G52 phone configuration - ADB over WiFi
14
+ PHONE_IP = "192.168.88.155"
15
+ PHONE_PORT = "5555"
16
+ PHONE_ADB = "#{PHONE_IP}:#{PHONE_PORT}"
17
+ TERMUX_HOME = "/data/data/com.termux/files/home"
18
+
19
+ # Junkpile server configuration (192.168.88.165 for WiFi access)
20
+ JUNKPILE_SSH = "j"
21
+ JUNKPILE_IP = "192.168.88.165"
22
+ WHISPER_PATH = "~/.local/bin/whisper"
23
+ CLAUDE_PATH = "/home/linuxbrew/.linuxbrew/bin/claude"
24
+
25
+ def self.run
26
+ new.start
27
+ end
28
+
29
+ def initialize
30
+ @server = ::MCP::Server.new(
31
+ name: "voice",
32
+ version: Personality::VERSION
33
+ )
34
+ @server.server_context = {}
35
+ register_tools
36
+ end
37
+
38
+ def start
39
+ transport = ::MCP::Transports::StdioTransport.new(@server)
40
+ transport.open
41
+ end
42
+
43
+ private
44
+
45
+ def tool_response(result)
46
+ ::MCP::Tool::Response.new([{type: "text", text: JSON.generate(result)}])
47
+ end
48
+
49
+ def register_tools
50
+ register_voice_record
51
+ register_voice_transcribe
52
+ register_voice_ask
53
+ register_voice_listen
54
+ register_voice_status
55
+ end
56
+
57
+ # === Voice Record Tool ===
58
+ # Records audio from the Moto G52 phone via Termux
59
+
60
+ def register_voice_record
61
+ @server.define_tool(
62
+ name: "voice_record",
63
+ description: "Record audio from the Moto G52 phone via Termux. Returns path to the recorded WAV file on junkpile.",
64
+ input_schema: {
65
+ type: "object",
66
+ properties: {
67
+ duration: {type: "integer", description: "Recording duration in seconds (default: 8, max: 60)"},
68
+ output_path: {type: "string", description: "Output path on junkpile (default: /tmp/phone_voice.wav)"}
69
+ }
70
+ }
71
+ ) do |server_context:, **opts|
72
+ duration = [opts[:duration] || 8, 60].min
73
+ output_path = opts[:output_path] || "/tmp/phone_voice.wav"
74
+
75
+ result = record_from_phone(duration: duration, output_path: output_path)
76
+ tool_response(result)
77
+ end
78
+ end
79
+
80
+ # === Voice Transcribe Tool ===
81
+ # Transcribes audio using Whisper on junkpile
82
+
83
+ def register_voice_transcribe
84
+ @server.define_tool(
85
+ name: "voice_transcribe",
86
+ description: "Transcribe audio file using Whisper STT on junkpile. Returns the transcribed text.",
87
+ input_schema: {
88
+ type: "object",
89
+ properties: {
90
+ audio_path: {type: "string", description: "Path to audio file on junkpile"},
91
+ model: {type: "string", description: "Whisper model to use (default: small). Options: tiny, base, small, medium, large"},
92
+ language: {type: "string", description: "Language code (default: en)"}
93
+ },
94
+ required: %w[audio_path]
95
+ }
96
+ ) do |audio_path:, server_context:, **opts|
97
+ model = opts[:model] || "small"
98
+ language = opts[:language] || "en"
99
+
100
+ result = transcribe_audio(audio_path: audio_path, model: model, language: language)
101
+ tool_response(result)
102
+ end
103
+ end
104
+
105
+ # === Voice Ask Tool ===
106
+ # Full pipeline: record -> transcribe -> Claude -> TTS response
107
+
108
+ def register_voice_ask
109
+ @server.define_tool(
110
+ name: "voice_ask",
111
+ description: "Full voice pipeline: record audio from phone, transcribe with Whisper, send to Claude, and speak the response. Returns the transcript and response.",
112
+ input_schema: {
113
+ type: "object",
114
+ properties: {
115
+ duration: {type: "integer", description: "Recording duration in seconds (default: 8)"},
116
+ model: {type: "string", description: "Whisper model (default: small)"},
117
+ speak_response: {type: "boolean", description: "Speak the response via TTS (default: true)"},
118
+ voice: {type: "string", description: "TTS voice to use (default: bt7274)"}
119
+ }
120
+ }
121
+ ) do |server_context:, **opts|
122
+ duration = opts[:duration] || 8
123
+ model = opts[:model] || "small"
124
+ speak_response = opts.fetch(:speak_response, true)
125
+ voice = opts[:voice] || "bt7274"
126
+
127
+ result = voice_ask_pipeline(
128
+ duration: duration,
129
+ model: model,
130
+ speak_response: speak_response,
131
+ voice: voice
132
+ )
133
+ tool_response(result)
134
+ end
135
+ end
136
+
137
+ # === Voice Listen Tool ===
138
+ # Starts continuous wake word listening (placeholder for future)
139
+
140
+ def register_voice_listen
141
+ @server.define_tool(
142
+ name: "voice_listen",
143
+ description: "Start continuous wake word listening on the phone (using Vosk). Currently returns status of the listener service.",
144
+ input_schema: {
145
+ type: "object",
146
+ properties: {
147
+ wake_word: {type: "string", description: "Wake word to listen for (default: hey b t)"},
148
+ action: {type: "string", enum: %w[start stop status], description: "Action: start, stop, or status (default: status)"}
149
+ }
150
+ }
151
+ ) do |server_context:, **opts|
152
+ action = opts[:action] || "status"
153
+ wake_word = opts[:wake_word] || "hey b t"
154
+
155
+ result = manage_wake_listener(action: action, wake_word: wake_word)
156
+ tool_response(result)
157
+ end
158
+ end
159
+
160
+ # === Voice Status Tool ===
161
+ # Check connectivity and status of voice components
162
+
163
+ def register_voice_status
164
+ @server.define_tool(
165
+ name: "voice_status",
166
+ description: "Check status of voice pipeline components: phone connectivity, junkpile availability, Whisper installation.",
167
+ input_schema: {type: "object", properties: {}}
168
+ ) do |server_context:, **|
169
+ result = check_voice_status
170
+ tool_response(result)
171
+ end
172
+ end
173
+
174
+ # === Implementation Methods ===
175
+
176
+ def record_from_phone(duration:, output_path:)
177
+ # Record on phone using termux-microphone-record via ADB
178
+ phone_audio = "#{TERMUX_HOME}/voice_cmd.wav"
179
+ local_audio = "/tmp/voice_cmd_local.wav"
180
+
181
+ # Ensure ADB is connected
182
+ _, _ = Open3.capture2("adb connect #{PHONE_ADB} 2>&1")
183
+
184
+ # Step 1: Record audio on phone via ADB + run-as
185
+ record_cmd = "adb -s #{PHONE_ADB} shell 'run-as com.termux #{TERMUX_HOME}/../usr/bin/termux-microphone-record -f #{phone_audio} -l #{duration}' 2>&1"
186
+ record_output, record_status = Open3.capture2(record_cmd)
187
+
188
+ unless record_status.success?
189
+ return {
190
+ success: false,
191
+ error: "Failed to record on phone",
192
+ details: record_output.strip
193
+ }
194
+ end
195
+
196
+ # Step 2: Pull audio from phone via ADB
197
+ pull_cmd = "adb -s #{PHONE_ADB} shell 'run-as com.termux cat #{phone_audio}' > #{local_audio} 2>/dev/null"
198
+ _, pull_status = Open3.capture2(pull_cmd)
199
+
200
+ unless pull_status.success? && File.exist?(local_audio) && File.size(local_audio) > 0
201
+ return {
202
+ success: false,
203
+ error: "Failed to pull audio from phone",
204
+ details: "File size: #{File.exist?(local_audio) ? File.size(local_audio) : "N/A"}"
205
+ }
206
+ end
207
+
208
+ # Step 3: Transfer to junkpile
209
+ scp_cmd = "scp -q #{local_audio} #{JUNKPILE_SSH}:#{output_path} 2>&1"
210
+ scp_output, scp_status = Open3.capture2(scp_cmd)
211
+
212
+ unless scp_status.success?
213
+ return {
214
+ success: false,
215
+ error: "Failed to transfer audio to junkpile",
216
+ details: scp_output.strip
217
+ }
218
+ end
219
+
220
+ # Step 4: Cleanup
221
+ File.delete(local_audio) if File.exist?(local_audio)
222
+ Open3.capture2("adb -s #{PHONE_ADB} shell 'run-as com.termux rm -f #{phone_audio}' 2>&1")
223
+
224
+ {
225
+ success: true,
226
+ duration: duration,
227
+ output_path: output_path,
228
+ message: "Recorded #{duration}s of audio"
229
+ }
230
+ end
231
+
232
+ def transcribe_audio(audio_path:, model:, language:)
233
+ # Convert audio to proper format and transcribe with Whisper
234
+ converted_path = "/tmp/voice_converted.wav"
235
+
236
+ # Build the transcription command
237
+ cmd = <<~BASH
238
+ export PATH=~/.local/bin:$PATH
239
+ ffmpeg -i #{Shellwords.escape(audio_path)} -ar 16000 -ac 1 #{converted_path} -y 2>/dev/null
240
+ #{WHISPER_PATH} #{converted_path} --model #{model} --language #{language} --output_format txt --output_dir /tmp 2>/dev/null
241
+ cat /tmp/voice_converted.txt 2>/dev/null | tr '\\n' ' ' | xargs
242
+ BASH
243
+
244
+ ssh_cmd = "ssh #{JUNKPILE_SSH} #{Shellwords.escape("bash -c #{Shellwords.escape(cmd)}")} 2>&1"
245
+ output, _ = Open3.capture2(ssh_cmd)
246
+
247
+ transcript = output.strip
248
+
249
+ if transcript.empty?
250
+ return {
251
+ success: false,
252
+ error: "Transcription returned empty result",
253
+ audio_path: audio_path
254
+ }
255
+ end
256
+
257
+ {
258
+ success: true,
259
+ transcript: transcript,
260
+ model: model,
261
+ language: language,
262
+ audio_path: audio_path
263
+ }
264
+ end
265
+
266
+ def voice_ask_pipeline(duration:, model:, speak_response:, voice:)
267
+ # Step 1: Record
268
+ audio_path = "/tmp/phone_voice.wav"
269
+ record_result = record_from_phone(duration: duration, output_path: audio_path)
270
+
271
+ unless record_result[:success]
272
+ return record_result.merge(stage: "record")
273
+ end
274
+
275
+ # Step 2: Transcribe
276
+ transcribe_result = transcribe_audio(audio_path: audio_path, model: model, language: "en")
277
+
278
+ unless transcribe_result[:success]
279
+ return transcribe_result.merge(stage: "transcribe")
280
+ end
281
+
282
+ transcript = transcribe_result[:transcript]
283
+
284
+ # Step 3: Get Claude response on junkpile
285
+ prompt = "Voice command from user: #{transcript}\nRespond concisely (1-2 sentences max)."
286
+
287
+ claude_cmd = <<~BASH
288
+ export PATH=/home/linuxbrew/.linuxbrew/bin:$PATH
289
+ #{CLAUDE_PATH} --print --output-format stream-json --verbose #{Shellwords.escape(prompt)} 2>/dev/null | grep '"type":"assistant"' | head -1 | jq -r '.message.content[0].text // empty'
290
+ BASH
291
+
292
+ ssh_cmd = "ssh #{JUNKPILE_SSH} #{Shellwords.escape("bash -c #{Shellwords.escape(claude_cmd)}")} 2>&1"
293
+ response, _ = Open3.capture2(ssh_cmd)
294
+ response = response.strip
295
+
296
+ if response.empty?
297
+ return {
298
+ success: false,
299
+ error: "Claude returned empty response",
300
+ transcript: transcript,
301
+ stage: "claude"
302
+ }
303
+ end
304
+
305
+ # Step 4: Speak response (if enabled)
306
+ if speak_response
307
+ # Use local TTS via psn
308
+ speak_text(text: response, voice: voice)
309
+ end
310
+
311
+ {
312
+ success: true,
313
+ transcript: transcript,
314
+ response: response,
315
+ spoke_response: speak_response,
316
+ voice: voice,
317
+ duration: duration,
318
+ model: model
319
+ }
320
+ end
321
+
322
+ def speak_text(text:, voice:)
323
+ # Call local piper TTS
324
+ require_relative "../tts"
325
+ Personality::TTS.speak(text, voice: voice)
326
+ rescue => e
327
+ {error: e.message}
328
+ end
329
+
330
+ def manage_wake_listener(action:, wake_word:)
331
+ case action
332
+ when "start"
333
+ # Future: Start Vosk wake word listener on phone
334
+ {
335
+ success: false,
336
+ message: "Wake word listener not yet implemented. Use voice_record for manual recording.",
337
+ wake_word: wake_word
338
+ }
339
+ when "stop"
340
+ {
341
+ success: false,
342
+ message: "Wake word listener not yet implemented"
343
+ }
344
+ when "status"
345
+ # Check if wake listener process is running via ADB
346
+ check_cmd = "adb -s #{PHONE_ADB} shell 'run-as com.termux pgrep -f vosk_wake || echo not_running' 2>&1"
347
+ output, _ = Open3.capture2(check_cmd)
348
+
349
+ running = !output.include?("not_running")
350
+
351
+ {
352
+ status: running ? "running" : "stopped",
353
+ wake_word: wake_word,
354
+ message: running ? "Wake word listener is active" : "Wake word listener is not running"
355
+ }
356
+ else
357
+ {error: "Unknown action: #{action}"}
358
+ end
359
+ end
360
+
361
+ def check_voice_status
362
+ status = {}
363
+
364
+ # Check phone connectivity via ADB
365
+ adb_check = "adb connect #{PHONE_ADB} 2>&1 && adb -s #{PHONE_ADB} shell echo ok 2>&1"
366
+ phone_output, _ = Open3.capture2(adb_check)
367
+ status[:phone] = {
368
+ host: PHONE_ADB,
369
+ method: "adb_wifi",
370
+ connected: phone_output.include?("ok")
371
+ }
372
+
373
+ # Check junkpile connectivity
374
+ junkpile_check = "ssh -o ConnectTimeout=3 #{JUNKPILE_SSH} 'echo ok' 2>&1"
375
+ junkpile_output, _ = Open3.capture2(junkpile_check)
376
+ status[:junkpile] = {
377
+ ssh_alias: JUNKPILE_SSH,
378
+ connected: junkpile_output.strip == "ok"
379
+ }
380
+
381
+ # Check Whisper installation on junkpile
382
+ if status[:junkpile][:connected]
383
+ whisper_check = "ssh #{JUNKPILE_SSH} 'test -x #{WHISPER_PATH} && echo ok' 2>&1"
384
+ whisper_output, _ = Open3.capture2(whisper_check)
385
+ status[:whisper] = {
386
+ path: WHISPER_PATH,
387
+ installed: whisper_output.strip == "ok"
388
+ }
389
+
390
+ # Check Claude CLI on junkpile
391
+ claude_check = "ssh #{JUNKPILE_SSH} 'test -x #{CLAUDE_PATH} && echo ok' 2>&1"
392
+ claude_output, _ = Open3.capture2(claude_check)
393
+ status[:claude] = {
394
+ path: CLAUDE_PATH,
395
+ installed: claude_output.strip == "ok"
396
+ }
397
+ end
398
+
399
+ # Overall status
400
+ all_ok = status[:phone][:connected] &&
401
+ status[:junkpile][:connected] &&
402
+ status.dig(:whisper, :installed) &&
403
+ status.dig(:claude, :installed)
404
+
405
+ status[:ready] = all_ok
406
+ status[:message] = all_ok ? "Voice pipeline ready" : "Some components unavailable"
407
+
408
+ status
409
+ end
410
+ end
411
+ end
412
+ end
@@ -2,40 +2,52 @@
2
2
 
3
3
  require "open3"
4
4
  require "fileutils"
5
+ require "net/http"
6
+ require "json"
5
7
 
6
8
  module Personality
7
9
  module TTS
8
10
  VOICES_DIR = File.join(Dir.home, ".local", "share", "psn", "voices")
9
11
  DATA_DIR = File.join(Dir.home, ".local", "share", "personality", "data")
10
- DEFAULT_VOICE = "en_US-lessac-medium"
12
+ DEFAULT_VOICE = "bt7274"
11
13
 
12
14
  PID_FILE = File.join(DATA_DIR, "tts.pid")
13
15
  WAV_FILE = File.join(DATA_DIR, "tts_current.wav")
14
16
  NATURAL_STOP_FLAG = File.join(DATA_DIR, "tts_natural_stop")
15
17
 
18
+ # XTTS configuration
19
+ XTTS_HOST = ENV.fetch("XTTS_HOST", "junkpile")
20
+ XTTS_PORT = ENV.fetch("XTTS_PORT", "5002")
21
+ XTTS_URL = "http://#{XTTS_HOST}:#{XTTS_PORT}"
22
+
23
+ # Backend selection: "auto" selects based on language (pl=xtts, en=piper)
24
+ BACKEND = ENV.fetch("TTS_BACKEND", "auto") # "piper", "xtts", or "auto"
25
+
16
26
  PIPER_VOICES_BASE_URL = "https://huggingface.co/rhasspy/piper-voices/resolve/main"
17
27
 
28
+ # Audio padding (matches XTTS server)
29
+ PADDING_MS = 250
30
+
18
31
  class << self
19
32
  # --- Synthesis & Playback ---
20
33
 
21
- def speak(text, voice: nil)
34
+ def speak(text, voice: nil, language: nil)
22
35
  stop_current
23
36
  voice ||= active_voice
37
+ language ||= detect_language(text)
24
38
 
25
- model_path = find_voice(voice)
26
- return {error: "Voice not found: #{voice}"} unless model_path
39
+ FileUtils.mkdir_p(DATA_DIR)
27
40
 
28
- piper_bin = find_piper
29
- return {error: "piper not installed"} unless piper_bin
41
+ # Select backend: auto mode uses XTTS for Polish, piper for English
42
+ backend = select_backend(language)
30
43
 
31
- FileUtils.mkdir_p(DATA_DIR)
44
+ result = if backend == "xtts"
45
+ synthesize_xtts(text, voice: voice, language: language)
46
+ else
47
+ synthesize_piper(text, voice: voice)
48
+ end
32
49
 
33
- # Synthesize to WAV
34
- _, stderr, status = Open3.capture3(
35
- piper_bin, "--model", model_path, "--output_file", WAV_FILE,
36
- stdin_data: text
37
- )
38
- return {error: "piper failed: #{stderr}"} unless status.success?
50
+ return result if result[:error]
39
51
 
40
52
  # Play audio (macOS: afplay, Linux: aplay)
41
53
  player = player_command
@@ -44,11 +56,11 @@ module Personality
44
56
  pid = spawn(player, WAV_FILE, [:out, :err] => "/dev/null")
45
57
  save_pid(pid)
46
58
 
47
- {speaking: true, voice: voice, pid: pid}
59
+ {speaking: true, voice: voice, pid: pid, backend: backend}
48
60
  end
49
61
 
50
- def speak_and_wait(text, voice: nil)
51
- result = speak(text, voice: voice)
62
+ def speak_and_wait(text, voice: nil, language: nil)
63
+ result = speak(text, voice: voice, language: language)
52
64
  return result if result[:error]
53
65
 
54
66
  Process.wait(result[:pid])
@@ -95,21 +107,144 @@ module Personality
95
107
  # --- Voice Management ---
96
108
 
97
109
  def find_voice(name)
110
+ # For XTTS, check if speaker embedding exists
111
+ if BACKEND == "xtts"
112
+ # XTTS voices are speaker embeddings on junkpile
113
+ return name if xtts_voice_available?(name)
114
+ return nil
115
+ end
116
+
117
+ # Piper: check for .onnx file
98
118
  path = File.join(VOICES_DIR, "#{name}.onnx")
99
119
  File.exist?(path) ? path : nil
100
120
  end
101
121
 
102
122
  def list_voices
123
+ if BACKEND == "xtts"
124
+ list_xtts_voices
125
+ else
126
+ list_piper_voices
127
+ end
128
+ end
129
+
130
+ def download_voice(voice_name)
131
+ if BACKEND == "xtts"
132
+ {error: "XTTS voices are pre-installed on #{XTTS_HOST}"}
133
+ else
134
+ download_piper_voice(voice_name)
135
+ end
136
+ end
137
+
138
+ def active_voice
139
+ ENV.fetch("PERSONALITY_VOICE", DEFAULT_VOICE)
140
+ end
141
+
142
+ def backend
143
+ BACKEND
144
+ end
145
+
146
+ private
147
+
148
+ # --- XTTS Backend ---
149
+
150
+ def synthesize_xtts(text, voice:, language:)
151
+ uri = URI.parse("#{XTTS_URL}/synthesize")
152
+
153
+ request = Net::HTTP::Post.new(uri)
154
+ request["Content-Type"] = "application/json"
155
+ request.body = JSON.generate({text: text, language: language})
156
+
157
+ response = Net::HTTP.start(uri.host, uri.port, read_timeout: 30) do |http|
158
+ http.request(request)
159
+ end
160
+
161
+ unless response.is_a?(Net::HTTPSuccess)
162
+ return {error: "XTTS synthesis failed: #{response.code} #{response.body}"}
163
+ end
164
+
165
+ File.binwrite(WAV_FILE, response.body)
166
+ {synthesized: true}
167
+ rescue Errno::ECONNREFUSED
168
+ {error: "XTTS server not running on #{XTTS_HOST}:#{XTTS_PORT}"}
169
+ rescue Net::ReadTimeout
170
+ {error: "XTTS synthesis timed out"}
171
+ end
172
+
173
+ def xtts_voice_available?(name)
174
+ # bt7274 is the only trained voice currently
175
+ name == "bt7274"
176
+ end
177
+
178
+ def list_xtts_voices
179
+ # Check if server is healthy
180
+ uri = URI.parse("#{XTTS_URL}/health")
181
+ response = Net::HTTP.get_response(uri)
182
+
183
+ if response.is_a?(Net::HTTPSuccess)
184
+ [{name: "bt7274", backend: "xtts", server: "#{XTTS_HOST}:#{XTTS_PORT}"}]
185
+ else
186
+ []
187
+ end
188
+ rescue Errno::ECONNREFUSED
189
+ []
190
+ end
191
+
192
+ # --- Piper Backend ---
193
+
194
+ def synthesize_piper(text, voice:)
195
+ model_path = find_voice(voice)
196
+ return {error: "Voice not found: #{voice}"} unless model_path
197
+
198
+ piper_bin = find_piper
199
+ return {error: "piper not installed"} unless piper_bin
200
+
201
+ raw_wav = "#{WAV_FILE}.raw"
202
+ _, stderr, status = Open3.capture3(
203
+ piper_bin, "--model", model_path, "--output_file", raw_wav,
204
+ stdin_data: text
205
+ )
206
+
207
+ return {error: "piper failed: #{stderr}"} unless status.success?
208
+
209
+ # Add 250ms silence at start (matches XTTS padding)
210
+ add_silence_padding(raw_wav, WAV_FILE)
211
+
212
+ {synthesized: true}
213
+ end
214
+
215
+ def add_silence_padding(input_wav, output_wav)
216
+ sox_bin = `which sox 2>/dev/null`.strip
217
+ padding_sec = PADDING_MS / 1000.0
218
+
219
+ if !sox_bin.empty? && File.executable?(sox_bin)
220
+ # Use sox to pad silence at start
221
+ system(sox_bin, input_wav, output_wav, "pad", padding_sec.to_s, "0",
222
+ [:out, :err] => "/dev/null")
223
+ FileUtils.rm_f(input_wav)
224
+ else
225
+ # Fallback: just rename (no padding)
226
+ FileUtils.mv(input_wav, output_wav)
227
+ end
228
+ end
229
+
230
+ def find_piper
231
+ [
232
+ File.join(Dir.home, ".local", "bin", "piper"),
233
+ `which piper 2>/dev/null`.strip
234
+ ].find { |p| !p.empty? && File.executable?(p) }
235
+ end
236
+
237
+ def list_piper_voices
103
238
  return [] unless Dir.exist?(VOICES_DIR)
104
239
 
105
240
  Dir.glob(File.join(VOICES_DIR, "*.onnx")).map do |path|
106
241
  name = File.basename(path, ".onnx")
107
242
  size_mb = File.size(path) / (1024.0 * 1024)
108
- {name: name, path: path, size_mb: size_mb.round(1)}
243
+ {name: name, path: path, size_mb: size_mb.round(1), backend: "piper"}
109
244
  end.sort_by { |v| v[:name].downcase }
110
245
  end
111
246
 
112
- def download_voice(voice_name)
247
+ def download_piper_voice(voice_name)
113
248
  FileUtils.mkdir_p(VOICES_DIR)
114
249
 
115
250
  model_path = File.join(VOICES_DIR, "#{voice_name}.onnx")
@@ -120,17 +255,14 @@ module Personality
120
255
  parts = voice_name.split("-")
121
256
  return {error: "Invalid voice format"} if parts.length < 2
122
257
 
123
- lang = parts[0] # en_US
124
- lang_short = lang.split("_")[0] # en
125
- name = parts[1] # lessac
258
+ lang = parts[0]
259
+ lang_short = lang.split("_")[0]
260
+ name = parts[1]
126
261
  quality = parts[2] || "medium"
127
262
 
128
263
  model_url = "#{PIPER_VOICES_BASE_URL}/#{lang_short}/#{lang}/#{name}/#{quality}/#{voice_name}.onnx"
129
264
  config_url = "#{PIPER_VOICES_BASE_URL}/#{lang_short}/#{lang}/#{name}/#{quality}/#{voice_name}.onnx.json"
130
265
 
131
- require "net/http"
132
- require "uri"
133
-
134
266
  download_file(model_url, model_path)
135
267
  download_file(config_url, config_path)
136
268
 
@@ -142,18 +274,12 @@ module Personality
142
274
  {error: "Download failed: #{e.message}"}
143
275
  end
144
276
 
145
- def active_voice
146
- ENV.fetch("PERSONALITY_VOICE", DEFAULT_VOICE)
147
- end
148
-
149
- private
277
+ # --- Utilities ---
150
278
 
151
- def find_piper
152
- # Check common locations
153
- [
154
- File.join(Dir.home, ".local", "bin", "piper"),
155
- `which piper 2>/dev/null`.strip
156
- ].find { |p| !p.empty? && File.executable?(p) }
279
+ def detect_language(text)
280
+ # Simple heuristic: check for Polish characters
281
+ polish_chars = /[ąćęłńóśźżĄĆĘŁŃÓŚŹŻ]/
282
+ text.match?(polish_chars) ? "pl" : "en"
157
283
  end
158
284
 
159
285
  def player_command
@@ -164,6 +290,13 @@ module Personality
164
290
  end
165
291
  end
166
292
 
293
+ def select_backend(language)
294
+ return BACKEND unless BACKEND == "auto"
295
+
296
+ # Polish uses XTTS (trained voice), English uses piper (fast)
297
+ (language == "pl") ? "xtts" : "piper"
298
+ end
299
+
167
300
  def save_pid(pid)
168
301
  File.write(PID_FILE, pid.to_s)
169
302
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Personality
4
- VERSION = "0.1.4"
4
+ VERSION = "0.1.6"
5
5
  end