myagent-ai 1.15.93 → 1.15.95

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "myagent-ai",
3
- "version": "1.15.93",
3
+ "version": "1.15.95",
4
4
  "description": "本地桌面端执行型AI助手 - Open Interpreter 风格 | Local Desktop Execution-Oriented AI Assistant",
5
5
  "main": "main.py",
6
6
  "bin": {
package/requirements.txt CHANGED
@@ -52,6 +52,7 @@ edge-tts>=6.1.0
52
52
  # 语音识别 (本地 STT,默认启用)
53
53
  # ============================================================
54
54
  faster-whisper>=1.0.0
55
+ pydub>=0.25.1
55
56
 
56
57
  # ============================================================
57
58
  # Anthropic Claude (可选)
@@ -4433,13 +4433,14 @@ var VoiceInput = {
4433
4433
  }
4434
4434
  },
4435
4435
 
4436
- /** Start recording(使用 MediaRecorder) */
4436
+ /** Start recording(使用 AudioContext 直接录制 WAV,无需后端 ffmpeg) */
4437
4437
  startRecording: async function() {
4438
4438
  if (this.isRecording) return;
4439
4439
 
4440
4440
  this.isRecording = true;
4441
- this.audioChunks = [];
4442
4441
  this.rawText = '';
4442
+ // PCM 样本缓冲区(Float32)
4443
+ this._pcmSamples = [];
4443
4444
 
4444
4445
  var btn = document.getElementById('voiceRecordBtn');
4445
4446
  if (btn) btn.classList.add('recording');
@@ -4468,41 +4469,24 @@ var VoiceInput = {
4468
4469
  }
4469
4470
  });
4470
4471
 
4471
- // 创建 MediaRecorder(优先使用 WAV 格式,回退到 WEBM)
4472
- var mimeType = 'audio/webm;codecs=opus';
4473
- if (typeof MediaRecorder !== 'undefined' && MediaRecorder.isTypeSupported) {
4474
- if (MediaRecorder.isTypeSupported('audio/webm;codecs=opus')) {
4475
- mimeType = 'audio/webm;codecs=opus';
4476
- } else if (MediaRecorder.isTypeSupported('audio/webm')) {
4477
- mimeType = 'audio/webm';
4478
- } else if (MediaRecorder.isTypeSupported('audio/ogg;codecs=opus')) {
4479
- mimeType = 'audio/ogg;codecs=opus';
4480
- }
4481
- }
4482
-
4483
- this.mediaRecorder = new MediaRecorder(this._audioStream, { mimeType: mimeType });
4472
+ // [v1.15.94] 使用 AudioContext + ScriptProcessorNode 直接录制 16kHz 单声道 PCM
4473
+ // 最终编码为 WAV,后端无需 pydub/ffmpeg
4474
+ var AudioCtx = window.AudioContext || window.webkitAudioContext;
4475
+ this._audioContext = new AudioCtx({ sampleRate: 16000 });
4476
+ var source = this._audioContext.createMediaStreamSource(this._audioStream);
4477
+ // bufferSize 4096 @ 16kHz ≈ 256ms per chunk
4478
+ this._scriptProcessor = this._audioContext.createScriptProcessor(4096, 1, 1);
4484
4479
  var self = this;
4485
4480
 
4486
- this.mediaRecorder.ondataavailable = function(e) {
4487
- if (e.data && e.data.size > 0) {
4488
- self.audioChunks.push(e.data);
4489
- }
4490
- };
4491
-
4492
- this.mediaRecorder.onstop = function() {
4493
- self._processAudio();
4481
+ this._scriptProcessor.onaudioprocess = function(e) {
4482
+ if (!self.isRecording) return;
4483
+ var inputData = e.inputBuffer.getChannelData(0);
4484
+ self._pcmSamples.push(new Float32Array(inputData));
4494
4485
  };
4495
4486
 
4496
- this.mediaRecorder.onerror = function(e) {
4497
- console.error('MediaRecorder error:', e.error);
4498
- self.isRecording = false;
4499
- if (btn) btn.classList.remove('recording');
4500
- self._showStatus('录音出错,请重试', 'var(--danger)');
4501
- self._cleanupStream();
4502
- };
4487
+ source.connect(this._scriptProcessor);
4488
+ this._scriptProcessor.connect(this._audioContext.destination);
4503
4489
 
4504
- // 开始录音(每100ms收集一次数据)
4505
- this.mediaRecorder.start(100);
4506
4490
  this._showStatus('正在录音...', 'var(--text3)');
4507
4491
  this._recordingStartTime = Date.now();
4508
4492
 
@@ -4516,7 +4500,7 @@ var VoiceInput = {
4516
4500
 
4517
4501
  /** Stop recording and process audio */
4518
4502
  stopRecording: function() {
4519
- if (!this.isRecording || !this.mediaRecorder) return;
4503
+ if (!this.isRecording) return;
4520
4504
 
4521
4505
  var btn = document.getElementById('voiceRecordBtn');
4522
4506
  if (btn) btn.classList.remove('recording');
@@ -4526,32 +4510,57 @@ var VoiceInput = {
4526
4510
  if (duration < 500) {
4527
4511
  this._showStatus('录音时间太短,请按住麦克风说话', 'var(--text3)');
4528
4512
  this.isRecording = false;
4529
- try { this.mediaRecorder.stop(); } catch (e) {}
4530
4513
  this._cleanupStream();
4531
4514
  return;
4532
4515
  }
4533
4516
 
4534
- this._showStatus('正在识别...', 'var(--text3)');
4535
4517
  this.isRecording = false;
4536
-
4537
- try {
4538
- this.mediaRecorder.stop();
4539
- } catch (e) {}
4518
+ this._cleanupStream();
4519
+ this._showStatus('正在识别...', 'var(--text3)');
4520
+ this._processAudio();
4540
4521
  },
4541
4522
 
4542
- /** 清理音频流 */
4543
- _cleanupStream: function() {
4544
- if (this._audioStream) {
4545
- this._audioStream.getTracks().forEach(function(t) { t.stop(); });
4546
- this._audioStream = null;
4523
+ /** 清理音频流 */\n _cleanupStream: function() {\n // 断开 ScriptProcessor\n if (this._scriptProcessor) {\n try { this._scriptProcessor.disconnect(); } catch(e) {}\n this._scriptProcessor = null;\n }\n // 关闭 AudioContext\n if (this._audioContext && this._audioContext.state !== 'closed') {\n try { this._audioContext.close(); } catch(e) {}\n this._audioContext = null;\n }\n // 停止麦克风流\n if (this._audioStream) {\n this._audioStream.getTracks().forEach(function(t) { t.stop(); });\n this._audioStream = null;\n }\n },
4524
+
4525
+ /** PCM Float32 样本编码为 WAV Blob(16kHz, 单声道, 16bit) */
4526
+ _encodeWav: function(samples, sampleRate) {
4527
+ var numSamples = 0;
4528
+ for (var i = 0; i < samples.length; i++) numSamples += samples[i].length;
4529
+ var buffer = new ArrayBuffer(44 + numSamples * 2);
4530
+ var view = new DataView(buffer);
4531
+ // RIFF header
4532
+ var writeStr = function(offset, str) {
4533
+ for (var i = 0; i < str.length; i++) view.setUint8(offset + i, str.charCodeAt(i));
4534
+ };
4535
+ writeStr(0, 'RIFF');
4536
+ view.setUint32(4, 36 + numSamples * 2, true);
4537
+ writeStr(8, 'WAVE');
4538
+ writeStr(12, 'fmt ');
4539
+ view.setUint32(16, 16, true); // PCM subchunk size
4540
+ view.setUint16(20, 1, true); // PCM format
4541
+ view.setUint16(22, 1, true); // mono
4542
+ view.setUint32(24, sampleRate, true);
4543
+ view.setUint32(28, sampleRate * 2, true); // byte rate
4544
+ view.setUint16(32, 2, true); // block align
4545
+ view.setUint16(34, 16, true); // bits per sample
4546
+ writeStr(36, 'data');
4547
+ view.setUint32(40, numSamples * 2, true);
4548
+ // 写入 PCM 数据(Float32 → Int16)
4549
+ var offset = 44;
4550
+ for (var i = 0; i < samples.length; i++) {
4551
+ for (var j = 0; j < samples[i].length; j++) {
4552
+ var s = Math.max(-1, Math.min(1, samples[i][j]));
4553
+ view.setInt16(offset, s < 0 ? s * 0x8000 : s * 0x7FFF, true);
4554
+ offset += 2;
4555
+ }
4547
4556
  }
4557
+ return new Blob([buffer], { type: 'audio/wav' });
4548
4558
  },
4549
4559
 
4550
- /** 处理录音数据:发送到后端 STT */
4560
+ /** 处理录音数据:编码为 WAV 并发送到后端 STT */
4551
4561
  _processAudio: async function() {
4552
- if (this.audioChunks.length === 0) {
4562
+ if (!this._pcmSamples || this._pcmSamples.length === 0) {
4553
4563
  this._showStatus('未检测到语音,请重试', 'var(--text3)');
4554
- this._cleanupStream();
4555
4564
  return;
4556
4565
  }
4557
4566
 
@@ -4569,12 +4578,14 @@ var VoiceInput = {
4569
4578
  if (previewSend) previewSend.disabled = true;
4570
4579
 
4571
4580
  try {
4572
- var audioBlob = new Blob(this.audioChunks, { type: this.mediaRecorder ? this.mediaRecorder.mimeType : 'audio/webm' });
4581
+ // [v1.15.94] 在浏览器端直接编码为 WAV,后端无需 pydub/ffmpeg
4582
+ var audioBlob = this._encodeWav(this._pcmSamples, 16000);
4583
+ this._pcmSamples = []; // 释放内存
4573
4584
 
4574
- // 发送音频到后端 STT 端点
4585
+ // 发送 WAV 音频到后端 STT 端点
4575
4586
  var formData = new FormData();
4576
- formData.append('audio', audioBlob, 'recording.webm');
4577
- formData.append('format', 'webm');
4587
+ formData.append('audio', audioBlob, 'recording.wav');
4588
+ formData.append('format', 'wav');
4578
4589
 
4579
4590
  var resp = await fetch('/api/voice-stt', {
4580
4591
  method: 'POST',
@@ -4616,9 +4627,6 @@ var VoiceInput = {
4616
4627
  /** Cancel recording without processing */
4617
4628
  cancelRecording: function() {
4618
4629
  this.isRecording = false;
4619
- if (this.mediaRecorder && this.mediaRecorder.state !== 'inactive') {
4620
- try { this.mediaRecorder.abort(); } catch(e) {}
4621
- }
4622
4630
  this._cleanupStream();
4623
4631
  var btn = document.getElementById('voiceRecordBtn');
4624
4632
  if (btn) btn.classList.remove('recording');
@@ -4627,7 +4635,7 @@ var VoiceInput = {
4627
4635
  statusEl.textContent = '';
4628
4636
  }
4629
4637
  this.rawText = '';
4630
- this.audioChunks = [];
4638
+ this._pcmSamples = [];
4631
4639
  },
4632
4640
 
4633
4641
  /** Cancel voice preview and return to voice input mode */