edge-gemma-speak 0.1.0__tar.gz → 0.1.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {edge_gemma_speak-0.1.0/edge_gemma_speak.egg-info → edge_gemma_speak-0.1.1}/PKG-INFO +11 -1
- {edge_gemma_speak-0.1.0 → edge_gemma_speak-0.1.1}/README.md +10 -0
- {edge_gemma_speak-0.1.0 → edge_gemma_speak-0.1.1}/edge_gemma_speak/__init__.py +1 -1
- {edge_gemma_speak-0.1.0 → edge_gemma_speak-0.1.1}/edge_gemma_speak/cli.py +6 -0
- {edge_gemma_speak-0.1.0 → edge_gemma_speak-0.1.1}/edge_gemma_speak/voice_assistant.py +1 -1
- {edge_gemma_speak-0.1.0 → edge_gemma_speak-0.1.1/edge_gemma_speak.egg-info}/PKG-INFO +11 -1
- {edge_gemma_speak-0.1.0 → edge_gemma_speak-0.1.1}/pyproject.toml +1 -1
- {edge_gemma_speak-0.1.0 → edge_gemma_speak-0.1.1}/setup.py +1 -1
- {edge_gemma_speak-0.1.0 → edge_gemma_speak-0.1.1}/LICENSE +0 -0
- {edge_gemma_speak-0.1.0 → edge_gemma_speak-0.1.1}/MANIFEST.in +0 -0
- {edge_gemma_speak-0.1.0 → edge_gemma_speak-0.1.1}/edge_gemma_speak.egg-info/SOURCES.txt +0 -0
- {edge_gemma_speak-0.1.0 → edge_gemma_speak-0.1.1}/edge_gemma_speak.egg-info/dependency_links.txt +0 -0
- {edge_gemma_speak-0.1.0 → edge_gemma_speak-0.1.1}/edge_gemma_speak.egg-info/entry_points.txt +0 -0
- {edge_gemma_speak-0.1.0 → edge_gemma_speak-0.1.1}/edge_gemma_speak.egg-info/requires.txt +0 -0
- {edge_gemma_speak-0.1.0 → edge_gemma_speak-0.1.1}/edge_gemma_speak.egg-info/top_level.txt +0 -0
- {edge_gemma_speak-0.1.0 → edge_gemma_speak-0.1.1}/requirements.txt +0 -0
- {edge_gemma_speak-0.1.0 → edge_gemma_speak-0.1.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: edge_gemma_speak
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.1
|
4
4
|
Summary: Edge-based voice assistant using Gemma LLM with STT and TTS capabilities
|
5
5
|
Home-page: https://github.com/yourusername/edge_gemma_speak
|
6
6
|
Author: MimicLab, Sogang University
|
@@ -34,6 +34,9 @@ Dynamic: license-file
|
|
34
34
|
Dynamic: requires-python
|
35
35
|
|
36
36
|
# 🎙️ Edge Gemma Speak
|
37
|
+
[](https://badge.fury.io/py/edge-gemma-speak)
|
38
|
+
[](https://github.com/MIMICLab/EdgeGemmaSpeak/blob/main/LICENSE)
|
39
|
+
[](https://pepy.tech/project/edge-gemma-speak)
|
37
40
|
|
38
41
|
Edge-based voice assistant using Gemma LLM with Speech-to-Text and Text-to-Speech capabilities
|
39
42
|
|
@@ -122,6 +125,12 @@ edge-gemma-speak --stt-beam-size 10
|
|
122
125
|
# Adjust VAD sensitivity (default: 0.5)
|
123
126
|
edge-gemma-speak --stt-vad-threshold 0.3
|
124
127
|
|
128
|
+
# Adjust minimum speech duration in ms (default: 250)
|
129
|
+
edge-gemma-speak --stt-vad-min-speech-duration 200
|
130
|
+
|
131
|
+
# Adjust minimum silence duration in ms (default: 1000)
|
132
|
+
edge-gemma-speak --stt-vad-min-silence-duration 800
|
133
|
+
|
125
134
|
# Change Whisper model size (tiny, base, small, medium, large)
|
126
135
|
edge-gemma-speak --stt-model small
|
127
136
|
```
|
@@ -296,6 +305,7 @@ For large LLM models:
|
|
296
305
|
- Check microphone permissions in system settings
|
297
306
|
- Close other audio applications
|
298
307
|
- Adjust VAD threshold: `--stt-vad-threshold 0.3`
|
308
|
+
- Reduce silence duration for faster response: `--stt-vad-min-silence-duration 500`
|
299
309
|
|
300
310
|
### Model File Not Found
|
301
311
|
|
@@ -1,4 +1,7 @@
|
|
1
1
|
# 🎙️ Edge Gemma Speak
|
2
|
+
[](https://badge.fury.io/py/edge-gemma-speak)
|
3
|
+
[](https://github.com/MIMICLab/EdgeGemmaSpeak/blob/main/LICENSE)
|
4
|
+
[](https://pepy.tech/project/edge-gemma-speak)
|
2
5
|
|
3
6
|
Edge-based voice assistant using Gemma LLM with Speech-to-Text and Text-to-Speech capabilities
|
4
7
|
|
@@ -87,6 +90,12 @@ edge-gemma-speak --stt-beam-size 10
|
|
87
90
|
# Adjust VAD sensitivity (default: 0.5)
|
88
91
|
edge-gemma-speak --stt-vad-threshold 0.3
|
89
92
|
|
93
|
+
# Adjust minimum speech duration in ms (default: 250)
|
94
|
+
edge-gemma-speak --stt-vad-min-speech-duration 200
|
95
|
+
|
96
|
+
# Adjust minimum silence duration in ms (default: 1000)
|
97
|
+
edge-gemma-speak --stt-vad-min-silence-duration 800
|
98
|
+
|
90
99
|
# Change Whisper model size (tiny, base, small, medium, large)
|
91
100
|
edge-gemma-speak --stt-model small
|
92
101
|
```
|
@@ -261,6 +270,7 @@ For large LLM models:
|
|
261
270
|
- Check microphone permissions in system settings
|
262
271
|
- Close other audio applications
|
263
272
|
- Adjust VAD threshold: `--stt-vad-threshold 0.3`
|
273
|
+
- Reduce silence duration for faster response: `--stt-vad-min-silence-duration 500`
|
264
274
|
|
265
275
|
### Model File Not Found
|
266
276
|
|
@@ -107,6 +107,10 @@ def main():
|
|
107
107
|
help="STT temperature for sampling (default: 0.0)")
|
108
108
|
parser.add_argument("--stt-vad-threshold", type=float, default=0.5,
|
109
109
|
help="STT VAD threshold (default: 0.5)")
|
110
|
+
parser.add_argument("--stt-vad-min-speech-duration", type=int, default=250,
|
111
|
+
help="Minimum speech duration in ms (default: 250)")
|
112
|
+
parser.add_argument("--stt-vad-min-silence-duration", type=int, default=1000,
|
113
|
+
help="Minimum silence duration in ms before cutting off (default: 1000)")
|
110
114
|
|
111
115
|
# LLM 파라미터
|
112
116
|
parser.add_argument("--llm-max-tokens", type=int, default=512,
|
@@ -268,6 +272,8 @@ def main():
|
|
268
272
|
stt_beam_size=args.stt_beam_size,
|
269
273
|
stt_temperature=args.stt_temperature,
|
270
274
|
stt_vad_threshold=args.stt_vad_threshold,
|
275
|
+
stt_vad_min_speech_duration_ms=args.stt_vad_min_speech_duration,
|
276
|
+
stt_vad_min_silence_duration_ms=args.stt_vad_min_silence_duration,
|
271
277
|
# TTS parameters
|
272
278
|
tts_voice=tts_voice,
|
273
279
|
# LLM parameters
|
@@ -72,7 +72,7 @@ class ModelConfig:
|
|
72
72
|
stt_temperature: float = 0.0
|
73
73
|
stt_vad_threshold: float = 0.5
|
74
74
|
stt_vad_min_speech_duration_ms: int = 250
|
75
|
-
stt_vad_min_silence_duration_ms: int =
|
75
|
+
stt_vad_min_silence_duration_ms: int = 1000 # Reduced from 2000ms for faster response
|
76
76
|
|
77
77
|
# TTS detailed settings
|
78
78
|
tts_voice: str = "ko-KR-HyunsuMultilingualNeural"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: edge_gemma_speak
|
3
|
-
Version: 0.1.
|
3
|
+
Version: 0.1.1
|
4
4
|
Summary: Edge-based voice assistant using Gemma LLM with STT and TTS capabilities
|
5
5
|
Home-page: https://github.com/yourusername/edge_gemma_speak
|
6
6
|
Author: MimicLab, Sogang University
|
@@ -34,6 +34,9 @@ Dynamic: license-file
|
|
34
34
|
Dynamic: requires-python
|
35
35
|
|
36
36
|
# 🎙️ Edge Gemma Speak
|
37
|
+
[](https://badge.fury.io/py/edge-gemma-speak)
|
38
|
+
[](https://github.com/MIMICLab/EdgeGemmaSpeak/blob/main/LICENSE)
|
39
|
+
[](https://pepy.tech/project/edge-gemma-speak)
|
37
40
|
|
38
41
|
Edge-based voice assistant using Gemma LLM with Speech-to-Text and Text-to-Speech capabilities
|
39
42
|
|
@@ -122,6 +125,12 @@ edge-gemma-speak --stt-beam-size 10
|
|
122
125
|
# Adjust VAD sensitivity (default: 0.5)
|
123
126
|
edge-gemma-speak --stt-vad-threshold 0.3
|
124
127
|
|
128
|
+
# Adjust minimum speech duration in ms (default: 250)
|
129
|
+
edge-gemma-speak --stt-vad-min-speech-duration 200
|
130
|
+
|
131
|
+
# Adjust minimum silence duration in ms (default: 1000)
|
132
|
+
edge-gemma-speak --stt-vad-min-silence-duration 800
|
133
|
+
|
125
134
|
# Change Whisper model size (tiny, base, small, medium, large)
|
126
135
|
edge-gemma-speak --stt-model small
|
127
136
|
```
|
@@ -296,6 +305,7 @@ For large LLM models:
|
|
296
305
|
- Check microphone permissions in system settings
|
297
306
|
- Close other audio applications
|
298
307
|
- Adjust VAD threshold: `--stt-vad-threshold 0.3`
|
308
|
+
- Reduce silence duration for faster response: `--stt-vad-min-silence-duration 500`
|
299
309
|
|
300
310
|
### Model File Not Found
|
301
311
|
|
@@ -5,7 +5,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
|
|
5
5
|
|
6
6
|
setup(
|
7
7
|
name="edge_gemma_speak",
|
8
|
-
version="0.1.
|
8
|
+
version="0.1.1",
|
9
9
|
author="MimicLab, Sogang University",
|
10
10
|
author_email="",
|
11
11
|
description="Edge-based voice assistant using Gemma LLM with STT and TTS capabilities",
|
File without changes
|
File without changes
|
File without changes
|
{edge_gemma_speak-0.1.0 → edge_gemma_speak-0.1.1}/edge_gemma_speak.egg-info/dependency_links.txt
RENAMED
File without changes
|
{edge_gemma_speak-0.1.0 → edge_gemma_speak-0.1.1}/edge_gemma_speak.egg-info/entry_points.txt
RENAMED
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|