edge-gemma-speak 0.1.0__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -12,7 +12,7 @@ from .voice_assistant import (
12
12
  main
13
13
  )
14
14
 
15
- __version__ = "0.1.0"
15
+ __version__ = "0.1.1"
16
16
  __author__ = "MimicLab, Sogang University"
17
17
 
18
18
  __all__ = [
edge_gemma_speak/cli.py CHANGED
@@ -107,6 +107,10 @@ def main():
107
107
  help="STT temperature for sampling (default: 0.0)")
108
108
  parser.add_argument("--stt-vad-threshold", type=float, default=0.5,
109
109
  help="STT VAD threshold (default: 0.5)")
110
+ parser.add_argument("--stt-vad-min-speech-duration", type=int, default=250,
111
+ help="Minimum speech duration in ms (default: 250)")
112
+ parser.add_argument("--stt-vad-min-silence-duration", type=int, default=1000,
113
+ help="Minimum silence duration in ms before cutting off (default: 1000)")
110
114
 
111
115
  # LLM 파라미터
112
116
  parser.add_argument("--llm-max-tokens", type=int, default=512,
@@ -268,6 +272,8 @@ def main():
268
272
  stt_beam_size=args.stt_beam_size,
269
273
  stt_temperature=args.stt_temperature,
270
274
  stt_vad_threshold=args.stt_vad_threshold,
275
+ stt_vad_min_speech_duration_ms=args.stt_vad_min_speech_duration,
276
+ stt_vad_min_silence_duration_ms=args.stt_vad_min_silence_duration,
271
277
  # TTS parameters
272
278
  tts_voice=tts_voice,
273
279
  # LLM parameters
@@ -72,7 +72,7 @@ class ModelConfig:
72
72
  stt_temperature: float = 0.0
73
73
  stt_vad_threshold: float = 0.5
74
74
  stt_vad_min_speech_duration_ms: int = 250
75
- stt_vad_min_silence_duration_ms: int = 2000
75
+ stt_vad_min_silence_duration_ms: int = 1000 # Reduced from 2000ms for faster response
76
76
 
77
77
  # TTS detailed settings
78
78
  tts_voice: str = "ko-KR-HyunsuMultilingualNeural"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: edge_gemma_speak
3
- Version: 0.1.0
3
+ Version: 0.1.1
4
4
  Summary: Edge-based voice assistant using Gemma LLM with STT and TTS capabilities
5
5
  Home-page: https://github.com/yourusername/edge_gemma_speak
6
6
  Author: MimicLab, Sogang University
@@ -34,6 +34,9 @@ Dynamic: license-file
34
34
  Dynamic: requires-python
35
35
 
36
36
  # 🎙️ Edge Gemma Speak
37
+ [![PyPI Status](https://badge.fury.io/py/edge-gemma-speak.svg)](https://badge.fury.io/py/edge-gemma-speak)
38
+ [![license](https://img.shields.io/badge/License-MIT-blue.svg)](https://github.com/MIMICLab/EdgeGemmaSpeak/blob/main/LICENSE)
39
+ [![Downloads](https://pepy.tech/badge/edge-gemma-speak)](https://pepy.tech/project/edge-gemma-speak)
37
40
 
38
41
  Edge-based voice assistant using Gemma LLM with Speech-to-Text and Text-to-Speech capabilities
39
42
 
@@ -122,6 +125,12 @@ edge-gemma-speak --stt-beam-size 10
122
125
  # Adjust VAD sensitivity (default: 0.5)
123
126
  edge-gemma-speak --stt-vad-threshold 0.3
124
127
 
128
+ # Adjust minimum speech duration in ms (default: 250)
129
+ edge-gemma-speak --stt-vad-min-speech-duration 200
130
+
131
+ # Adjust minimum silence duration in ms (default: 1000)
132
+ edge-gemma-speak --stt-vad-min-silence-duration 800
133
+
125
134
  # Change Whisper model size (tiny, base, small, medium, large)
126
135
  edge-gemma-speak --stt-model small
127
136
  ```
@@ -296,6 +305,7 @@ For large LLM models:
296
305
  - Check microphone permissions in system settings
297
306
  - Close other audio applications
298
307
  - Adjust VAD threshold: `--stt-vad-threshold 0.3`
308
+ - Reduce silence duration for faster response: `--stt-vad-min-silence-duration 500`
299
309
 
300
310
  ### Model File Not Found
301
311
 
@@ -0,0 +1,9 @@
1
+ edge_gemma_speak/__init__.py,sha256=D2nUTwXZlFuNG0XVpfQSs3pdwmS54FjqrhjWBQzjSzE,445
2
+ edge_gemma_speak/cli.py,sha256=sn7nLQtJlNE7lL-qCGRBTRj0hOj6Ec0r6mzxUlE2aFI,13001
3
+ edge_gemma_speak/voice_assistant.py,sha256=1m_3k-d1LSY1qoSxTKD9IL0qbxkpfkLcW0M3asBAN5U,26535
4
+ edge_gemma_speak-0.1.1.dist-info/licenses/LICENSE,sha256=PYqztu9Bfz2u_QNatdRVQ-K2_ywpSgwFiGiKrqQdayQ,1083
5
+ edge_gemma_speak-0.1.1.dist-info/METADATA,sha256=sJKbbg8yuyJ1_kGfl0DhXKlvXn7vCAhYQg4WfYMWDDs,10230
6
+ edge_gemma_speak-0.1.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
7
+ edge_gemma_speak-0.1.1.dist-info/entry_points.txt,sha256=nJgB49bs1CJFwrZkK_x4MBBNo8l8Ihf3EBGpBvr94pU,63
8
+ edge_gemma_speak-0.1.1.dist-info/top_level.txt,sha256=JUyz2j-g_v8aA-lWzxlBHaMpzZTvMR9MJH-h1ye3wa8,17
9
+ edge_gemma_speak-0.1.1.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- edge_gemma_speak/__init__.py,sha256=HPS-2No6K2IyCSnK0wLUci0mN-ZSlmgSjYw9nGLPUfA,445
2
- edge_gemma_speak/cli.py,sha256=99N4k-U9ORpcXtKOPYd3OI4SCBmZK-ayM39hvXM_2zY,12518
3
- edge_gemma_speak/voice_assistant.py,sha256=xR8Mn75EOZuFeOn9XHruaIIAo1qu5jOj9-NYEuX8VSQ,26492
4
- edge_gemma_speak-0.1.0.dist-info/licenses/LICENSE,sha256=PYqztu9Bfz2u_QNatdRVQ-K2_ywpSgwFiGiKrqQdayQ,1083
5
- edge_gemma_speak-0.1.0.dist-info/METADATA,sha256=Z_X-P8mE7VbR1j4yU12tsXnZ2-RFY76TCoTlMXn96-A,9597
6
- edge_gemma_speak-0.1.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
7
- edge_gemma_speak-0.1.0.dist-info/entry_points.txt,sha256=nJgB49bs1CJFwrZkK_x4MBBNo8l8Ihf3EBGpBvr94pU,63
8
- edge_gemma_speak-0.1.0.dist-info/top_level.txt,sha256=JUyz2j-g_v8aA-lWzxlBHaMpzZTvMR9MJH-h1ye3wa8,17
9
- edge_gemma_speak-0.1.0.dist-info/RECORD,,