PayPerTranscript 0.2.9__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/PKG-INFO +1 -1
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/PayPerTranscript.egg-info/PKG-INFO +1 -1
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/__init__.py +1 -1
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/core/hotkey.py +21 -1
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/core/text_inserter.py +23 -8
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/pipeline/transcription.py +45 -3
- paypertranscript-0.3.0/paypertranscript/providers/groq_provider.py +273 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/ui/app.py +72 -6
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/ui/overlay.py +43 -1
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/ui/tray.py +15 -9
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/pyproject.toml +1 -1
- paypertranscript-0.2.9/paypertranscript/providers/groq_provider.py +0 -193
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/LICENSE +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/PayPerTranscript.egg-info/SOURCES.txt +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/PayPerTranscript.egg-info/dependency_links.txt +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/PayPerTranscript.egg-info/entry_points.txt +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/PayPerTranscript.egg-info/requires.txt +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/PayPerTranscript.egg-info/top_level.txt +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/README.md +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/__main__.py +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/assets/icons/app.ico +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/assets/icons/app.png +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/assets/icons/app_big.png +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/assets/icons/arrow_down.svg +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/assets/icons/tray.png +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/assets/icons/tray_green.png +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/assets/icons/tray_orange.png +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/assets/sounds/start.wav +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/assets/sounds/stop.wav +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/assets/styles/dark.qss +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/core/__init__.py +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/core/audio_manager.py +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/core/config.py +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/core/cost_tracker.py +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/core/logging.py +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/core/paths.py +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/core/recorder.py +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/core/session_logger.py +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/core/updater.py +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/core/window_detector.py +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/pipeline/__init__.py +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/providers/__init__.py +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/providers/base.py +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/ui/__init__.py +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/ui/animated.py +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/ui/constants.py +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/ui/main_window.py +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/ui/pages/__init__.py +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/ui/pages/home_page.py +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/ui/pages/settings_page.py +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/ui/pages/statistics_page.py +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/ui/pages/window_mapping_page.py +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/ui/pages/word_list_page.py +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/ui/setup_wizard.py +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/ui/sidebar.py +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/ui/widgets.py +0 -0
- {paypertranscript-0.2.9 → paypertranscript-0.3.0}/setup.cfg +0 -0
|
@@ -54,6 +54,11 @@ _MODIFIER_GROUPS: dict[str, set[keyboard.Key]] = {
|
|
|
54
54
|
"cmd": {keyboard.Key.cmd, keyboard.Key.cmd_l, keyboard.Key.cmd_r},
|
|
55
55
|
}
|
|
56
56
|
|
|
57
|
+
# Alle Modifier-Keys (flach) fuer Exakt-Match-Pruefung
|
|
58
|
+
_ALL_MODIFIER_KEYS: set[keyboard.Key] = set()
|
|
59
|
+
for _grp in _MODIFIER_GROUPS.values():
|
|
60
|
+
_ALL_MODIFIER_KEYS |= _grp
|
|
61
|
+
|
|
57
62
|
# Alt-Keys fuer Menu-Bar-Workaround (Windows aktiviert Menueleiste bei bare Alt-Release)
|
|
58
63
|
_ALT_KEYS: set[keyboard.Key] = {keyboard.Key.alt_l, keyboard.Key.alt_r}
|
|
59
64
|
|
|
@@ -149,18 +154,33 @@ class HotkeyListener:
|
|
|
149
154
|
target_keys: list[keyboard.Key | keyboard.KeyCode],
|
|
150
155
|
modifier_groups: list[set[keyboard.Key]],
|
|
151
156
|
) -> bool:
|
|
152
|
-
"""Prüft ob eine Tastenkombination aktuell gedrückt ist.
|
|
157
|
+
"""Prüft ob eine Tastenkombination aktuell gedrückt ist.
|
|
158
|
+
|
|
159
|
+
Exaktes Modifier-Matching: es muessen genau die konfigurierten Modifier
|
|
160
|
+
gedrueckt sein, keine zusaetzlichen. Damit wird verhindert, dass z.B.
|
|
161
|
+
Ctrl+Win auch durch Ctrl+Shift+Alt+F9 ausgeloest wird.
|
|
162
|
+
"""
|
|
153
163
|
if not target_keys:
|
|
154
164
|
return False
|
|
155
165
|
|
|
166
|
+
# Sammle welche Modifier-Gruppen zum Hotkey gehoeren
|
|
167
|
+
required_modifier_keys: set[keyboard.Key] = set()
|
|
168
|
+
|
|
156
169
|
for i, target_key in enumerate(target_keys):
|
|
157
170
|
# Für Modifier: prüfe ob *irgendein* Key aus der Gruppe gedrückt ist
|
|
158
171
|
if i < len(modifier_groups) and modifier_groups[i]:
|
|
159
172
|
if not (modifier_groups[i] & self._pressed_keys):
|
|
160
173
|
return False
|
|
174
|
+
required_modifier_keys |= modifier_groups[i]
|
|
161
175
|
else:
|
|
162
176
|
if target_key not in self._pressed_keys:
|
|
163
177
|
return False
|
|
178
|
+
|
|
179
|
+
# Pruefe ob Extra-Modifier gedrueckt sind, die nicht zum Hotkey gehoeren
|
|
180
|
+
extra_modifiers = (self._pressed_keys & _ALL_MODIFIER_KEYS) - required_modifier_keys
|
|
181
|
+
if extra_modifiers:
|
|
182
|
+
return False
|
|
183
|
+
|
|
164
184
|
return True
|
|
165
185
|
|
|
166
186
|
def _combo_uses_alt(self, target_keys: list[keyboard.Key | keyboard.KeyCode]) -> bool:
|
|
@@ -19,6 +19,27 @@ log = get_logger("core.text_inserter")
|
|
|
19
19
|
pyautogui.FAILSAFE = False
|
|
20
20
|
pyautogui.PAUSE = 0
|
|
21
21
|
|
|
22
|
+
# Clipboard-Wiederherstellung: Retry-Konfiguration
|
|
23
|
+
_CLIPBOARD_RESTORE_RETRIES = 3
|
|
24
|
+
_CLIPBOARD_RESTORE_DELAY = 0.05 # 50ms zwischen Versuchen
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _restore_clipboard(content: str) -> None:
|
|
28
|
+
"""Stellt die Zwischenablage wieder her mit Retry-Logik.
|
|
29
|
+
|
|
30
|
+
Andere Apps (Clipboard-Manager, Password-Manager) koennen die
|
|
31
|
+
Zwischenablage kurzzeitig sperren. Daher mehrere Versuche.
|
|
32
|
+
"""
|
|
33
|
+
for attempt in range(1, _CLIPBOARD_RESTORE_RETRIES + 1):
|
|
34
|
+
try:
|
|
35
|
+
pyperclip.copy(content)
|
|
36
|
+
return
|
|
37
|
+
except Exception:
|
|
38
|
+
if attempt < _CLIPBOARD_RESTORE_RETRIES:
|
|
39
|
+
time.sleep(_CLIPBOARD_RESTORE_DELAY)
|
|
40
|
+
else:
|
|
41
|
+
log.warning("Zwischenablage konnte nicht wiederhergestellt werden (nach %d Versuchen)", _CLIPBOARD_RESTORE_RETRIES)
|
|
42
|
+
|
|
22
43
|
|
|
23
44
|
def insert_text(text: str) -> None:
|
|
24
45
|
"""Fügt Text an der aktuellen Cursor-Position ein.
|
|
@@ -62,10 +83,7 @@ def insert_text(text: str) -> None:
|
|
|
62
83
|
|
|
63
84
|
finally:
|
|
64
85
|
# 5. Alte Zwischenablage wiederherstellen
|
|
65
|
-
|
|
66
|
-
pyperclip.copy(old_clipboard)
|
|
67
|
-
except Exception:
|
|
68
|
-
log.debug("Zwischenablage konnte nicht wiederhergestellt werden")
|
|
86
|
+
_restore_clipboard(old_clipboard)
|
|
69
87
|
|
|
70
88
|
|
|
71
89
|
# Intervall (Sekunden) zwischen Chunk-Pastes bei Streaming-Typing
|
|
@@ -125,7 +143,4 @@ def insert_text_streaming(chunks: Iterator[str]) -> None:
|
|
|
125
143
|
log.error("Auch Fallback-Paste fehlgeschlagen")
|
|
126
144
|
|
|
127
145
|
finally:
|
|
128
|
-
|
|
129
|
-
pyperclip.copy(old_clipboard)
|
|
130
|
-
except Exception:
|
|
131
|
-
log.debug("Zwischenablage konnte nicht wiederhergestellt werden")
|
|
146
|
+
_restore_clipboard(old_clipboard)
|
{paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/pipeline/transcription.py
RENAMED
|
@@ -26,9 +26,38 @@ STATUS_STT_DONE = "stt_done"
|
|
|
26
26
|
STATUS_LLM_START = "llm_start"
|
|
27
27
|
STATUS_DONE = "done"
|
|
28
28
|
STATUS_ERROR = "error"
|
|
29
|
+
STATUS_LLM_FALLBACK = "llm_fallback"
|
|
29
30
|
|
|
30
31
|
log = get_logger("pipeline.transcription")
|
|
31
32
|
|
|
33
|
+
# Halluzinationsfilter: Whisper halluziniert bei kurzen Aufnahmen ohne Sprache
|
|
34
|
+
_HALLUCINATION_PATTERNS = [
|
|
35
|
+
"copyright", "untertitel", "subtitles by",
|
|
36
|
+
"thanks for watching", "thank you for watching",
|
|
37
|
+
"sous-titres", "amara.org",
|
|
38
|
+
]
|
|
39
|
+
_HALLUCINATION_MAX_DURATION = 5.0
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _is_hallucination(text: str, audio_duration: float) -> bool:
|
|
43
|
+
"""Prueft ob ein STT-Ergebnis eine Whisper-Halluzination ist.
|
|
44
|
+
|
|
45
|
+
Bei kurzen Aufnahmen (< 5s) ohne Sprache halluziniert Whisper
|
|
46
|
+
stereotypische Strings wie "Copyright Australian Broadcasting Corporation".
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
text: STT-Ergebnis.
|
|
50
|
+
audio_duration: Audio-Dauer in Sekunden.
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
True wenn der Text als Halluzination erkannt wurde.
|
|
54
|
+
"""
|
|
55
|
+
if audio_duration >= _HALLUCINATION_MAX_DURATION:
|
|
56
|
+
return False
|
|
57
|
+
text_lower = text.lower()
|
|
58
|
+
return any(pattern in text_lower for pattern in _HALLUCINATION_PATTERNS)
|
|
59
|
+
|
|
60
|
+
|
|
32
61
|
# Maximale Prompt-Laenge fuer Whisper (224 Tokens).
|
|
33
62
|
# Konservative Schaetzung: ~4 Zeichen pro Token fuer gemischten DE/EN Text.
|
|
34
63
|
_MAX_PROMPT_CHARS = 896
|
|
@@ -86,6 +115,7 @@ class TranscriptionPipeline:
|
|
|
86
115
|
self._config = config
|
|
87
116
|
self._session_logger = session_logger
|
|
88
117
|
self.last_transcription: str | None = None
|
|
118
|
+
self.last_wav_path: Path | None = None
|
|
89
119
|
log.info(
|
|
90
120
|
"TranscriptionPipeline initialisiert (LLM: %s, Tracking: %s)",
|
|
91
121
|
"aktiv" if llm_provider else "deaktiviert",
|
|
@@ -219,6 +249,8 @@ class TranscriptionPipeline:
|
|
|
219
249
|
except Exception:
|
|
220
250
|
pass
|
|
221
251
|
|
|
252
|
+
self.last_wav_path = wav_path
|
|
253
|
+
|
|
222
254
|
try:
|
|
223
255
|
# Audio-Dauer: entweder uebergeben oder aus WAV-Datei berechnen
|
|
224
256
|
if audio_duration is None:
|
|
@@ -241,7 +273,16 @@ class TranscriptionPipeline:
|
|
|
241
273
|
|
|
242
274
|
if not text:
|
|
243
275
|
log.info("Pipeline: STT lieferte leeren Text - uebersprungen")
|
|
244
|
-
_notify(
|
|
276
|
+
_notify(STATUS_ERROR, "Kein Text erkannt")
|
|
277
|
+
return
|
|
278
|
+
|
|
279
|
+
if _is_hallucination(text, audio_duration):
|
|
280
|
+
log.info(
|
|
281
|
+
"Pipeline: Halluzination erkannt (%.1fs, '%s') - uebersprungen",
|
|
282
|
+
audio_duration,
|
|
283
|
+
text[:80],
|
|
284
|
+
)
|
|
285
|
+
_notify(STATUS_ERROR, "Keine Sprache erkannt")
|
|
245
286
|
return
|
|
246
287
|
|
|
247
288
|
# LLM-Formatierung (falls Window-Mapping existiert)
|
|
@@ -262,7 +303,7 @@ class TranscriptionPipeline:
|
|
|
262
303
|
except Exception as e:
|
|
263
304
|
insert_ok = False
|
|
264
305
|
log.error("Pipeline: Text-Einfuegung fehlgeschlagen: %s", e)
|
|
265
|
-
_notify(STATUS_ERROR, "Text
|
|
306
|
+
_notify(STATUS_ERROR, f"Text-Einfuegung fehlgeschlagen: {e}")
|
|
266
307
|
|
|
267
308
|
def _do_insert_stream(chunks_iter: object) -> None:
|
|
268
309
|
nonlocal insert_ok
|
|
@@ -271,7 +312,7 @@ class TranscriptionPipeline:
|
|
|
271
312
|
except Exception as e:
|
|
272
313
|
insert_ok = False
|
|
273
314
|
log.error("Pipeline: Streaming-Einfuegung fehlgeschlagen: %s", e)
|
|
274
|
-
_notify(STATUS_ERROR, "Text
|
|
315
|
+
_notify(STATUS_ERROR, f"Text-Einfuegung fehlgeschlagen: {e}")
|
|
275
316
|
|
|
276
317
|
if system_prompt and self._llm:
|
|
277
318
|
_notify(STATUS_LLM_START)
|
|
@@ -299,6 +340,7 @@ class TranscriptionPipeline:
|
|
|
299
340
|
|
|
300
341
|
except ProviderError as e:
|
|
301
342
|
log.warning("Pipeline: LLM-Fehler - Fallback auf Rohtext: %s", e)
|
|
343
|
+
_notify(STATUS_LLM_FALLBACK, str(e))
|
|
302
344
|
_do_insert(text)
|
|
303
345
|
else:
|
|
304
346
|
# Kein Mapping oder kein LLM-Provider -> Rohtext direkt einfuegen
|
|
@@ -0,0 +1,273 @@
|
|
|
1
|
+
"""GroqCloud API-Provider für PayPerTranscript.
|
|
2
|
+
|
|
3
|
+
Implementiert STT (Whisper) und LLM-Formatierung über die GroqCloud API.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import time
|
|
7
|
+
from collections.abc import Iterator
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
import groq
|
|
11
|
+
|
|
12
|
+
from paypertranscript.core.logging import get_logger
|
|
13
|
+
from paypertranscript.providers.base import AbstractLLMProvider, AbstractSTTProvider, ProviderError
|
|
14
|
+
|
|
15
|
+
log = get_logger("providers.groq")
|
|
16
|
+
|
|
17
|
+
# Retry-Konfiguration fuer transiente API-Fehler
|
|
18
|
+
_MAX_RETRIES = 3
|
|
19
|
+
_RETRY_BASE_DELAY = 1.0 # Sekunden (exponential: 1s, 2s, 4s)
|
|
20
|
+
_RETRYABLE_ERRORS = (groq.RateLimitError, groq.APITimeoutError, groq.APIConnectionError)
|
|
21
|
+
|
|
22
|
+
# Minimale WAV-Dateigroesse (44 Bytes = WAV-Header ohne Audio-Daten)
|
|
23
|
+
_MIN_WAV_SIZE = 44
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class GroqSTTProvider(AbstractSTTProvider):
|
|
27
|
+
"""GroqCloud Whisper STT-Provider.
|
|
28
|
+
|
|
29
|
+
Nutzt whisper-large-v3-turbo für Speech-to-Text.
|
|
30
|
+
Der Groq-Client wird einmal instanziiert und wiederverwendet
|
|
31
|
+
(Connection Pooling via httpx).
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(
|
|
35
|
+
self,
|
|
36
|
+
api_key: str | None = None,
|
|
37
|
+
model: str = "whisper-large-v3-turbo",
|
|
38
|
+
) -> None:
|
|
39
|
+
self._model = model
|
|
40
|
+
try:
|
|
41
|
+
self._client = groq.Groq(api_key=api_key)
|
|
42
|
+
except groq.GroqError as e:
|
|
43
|
+
raise ProviderError(f"Groq-Client konnte nicht erstellt werden: {e}") from e
|
|
44
|
+
log.info("GroqSTTProvider initialisiert (Modell: %s)", self._model)
|
|
45
|
+
|
|
46
|
+
def transcribe(self, audio_path: Path, language: str, prompt: str = "") -> str:
|
|
47
|
+
"""Transkribiert eine WAV-Datei via GroqCloud Whisper API."""
|
|
48
|
+
if not audio_path.exists():
|
|
49
|
+
raise ProviderError(f"Audio-Datei nicht gefunden: {audio_path}")
|
|
50
|
+
|
|
51
|
+
# V05: Audio-Datei validieren (WAV-Header = 44 Bytes, leere Datei vermeiden)
|
|
52
|
+
file_size = audio_path.stat().st_size
|
|
53
|
+
if file_size <= _MIN_WAV_SIZE:
|
|
54
|
+
raise ProviderError(
|
|
55
|
+
f"Audio-Datei ist leer oder beschädigt ({file_size} Bytes)"
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
log.info(
|
|
59
|
+
"STT-Anfrage: %s (Sprache: %s, Modell: %s)",
|
|
60
|
+
audio_path.name,
|
|
61
|
+
language,
|
|
62
|
+
self._model,
|
|
63
|
+
)
|
|
64
|
+
if prompt:
|
|
65
|
+
log.info("STT-Prompt: %s", prompt)
|
|
66
|
+
|
|
67
|
+
# V01: Retry-Loop fuer transiente Fehler
|
|
68
|
+
last_error: Exception | None = None
|
|
69
|
+
for attempt in range(1, _MAX_RETRIES + 1):
|
|
70
|
+
try:
|
|
71
|
+
with open(audio_path, "rb") as audio_file:
|
|
72
|
+
transcription = self._client.audio.transcriptions.create(
|
|
73
|
+
model=self._model,
|
|
74
|
+
file=audio_file,
|
|
75
|
+
language=language,
|
|
76
|
+
prompt=prompt,
|
|
77
|
+
response_format="text",
|
|
78
|
+
temperature=0.0,
|
|
79
|
+
)
|
|
80
|
+
break # Erfolg
|
|
81
|
+
except groq.AuthenticationError as e:
|
|
82
|
+
raise ProviderError(f"API-Key ungültig: {e}") from e
|
|
83
|
+
except _RETRYABLE_ERRORS as e:
|
|
84
|
+
last_error = e
|
|
85
|
+
if attempt < _MAX_RETRIES:
|
|
86
|
+
delay = _RETRY_BASE_DELAY * (2 ** (attempt - 1))
|
|
87
|
+
log.warning(
|
|
88
|
+
"STT-Versuch %d/%d fehlgeschlagen: %s - Retry in %.1fs",
|
|
89
|
+
attempt, _MAX_RETRIES, e, delay,
|
|
90
|
+
)
|
|
91
|
+
time.sleep(delay)
|
|
92
|
+
else:
|
|
93
|
+
log.error("STT: Alle %d Versuche fehlgeschlagen", _MAX_RETRIES)
|
|
94
|
+
except groq.APIError as e:
|
|
95
|
+
raise ProviderError(f"GroqCloud API-Fehler: {e}") from e
|
|
96
|
+
else:
|
|
97
|
+
# Alle Retries erschoepft
|
|
98
|
+
e = last_error
|
|
99
|
+
if isinstance(e, groq.RateLimitError):
|
|
100
|
+
raise ProviderError(f"Rate Limit erreicht: {e}") from e
|
|
101
|
+
elif isinstance(e, groq.APITimeoutError):
|
|
102
|
+
raise ProviderError(f"GroqCloud Timeout: {e}") from e
|
|
103
|
+
else:
|
|
104
|
+
raise ProviderError(f"Keine Verbindung zu GroqCloud: {e}") from e
|
|
105
|
+
|
|
106
|
+
# response_format="text" gibt direkt einen String zurück
|
|
107
|
+
text = transcription.strip() if isinstance(transcription, str) else transcription.text.strip()
|
|
108
|
+
|
|
109
|
+
log.info("STT-Ergebnis: %d Zeichen", len(text))
|
|
110
|
+
return text
|
|
111
|
+
|
|
112
|
+
|
|
113
|
+
class GroqLLMProvider(AbstractLLMProvider):
|
|
114
|
+
"""GroqCloud LLM-Provider für Textformatierung.
|
|
115
|
+
|
|
116
|
+
Nutzt openai/gpt-oss-20b für kontextabhängige Formatierung.
|
|
117
|
+
Der Groq-Client wird einmal instanziiert und wiederverwendet.
|
|
118
|
+
"""
|
|
119
|
+
|
|
120
|
+
def __init__(
|
|
121
|
+
self,
|
|
122
|
+
api_key: str | None = None,
|
|
123
|
+
model: str = "openai/gpt-oss-20b",
|
|
124
|
+
temperature: float | None = None,
|
|
125
|
+
) -> None:
|
|
126
|
+
self._model = model
|
|
127
|
+
self._temperature = temperature
|
|
128
|
+
self._last_usage: dict[str, int] | None = None
|
|
129
|
+
try:
|
|
130
|
+
self._client = groq.Groq(api_key=api_key)
|
|
131
|
+
except groq.GroqError as e:
|
|
132
|
+
raise ProviderError(f"Groq-Client konnte nicht erstellt werden: {e}") from e
|
|
133
|
+
log.info("GroqLLMProvider initialisiert (Modell: %s, Temperature: %s)", self._model, self._temperature)
|
|
134
|
+
|
|
135
|
+
@property
|
|
136
|
+
def last_usage(self) -> dict[str, int] | None:
|
|
137
|
+
"""Token-Usage der letzten LLM-Anfrage."""
|
|
138
|
+
return self._last_usage
|
|
139
|
+
|
|
140
|
+
def _build_messages(
|
|
141
|
+
self, system_prompt: str, text: str
|
|
142
|
+
) -> list[dict[str, str]]:
|
|
143
|
+
return [
|
|
144
|
+
{"role": "system", "content": system_prompt},
|
|
145
|
+
{"role": "user", "content": f"<transcript>{text}</transcript>"},
|
|
146
|
+
]
|
|
147
|
+
|
|
148
|
+
def _completion_kwargs(self) -> dict:
|
|
149
|
+
"""Baut gemeinsame kwargs für chat.completions.create."""
|
|
150
|
+
kwargs: dict = {}
|
|
151
|
+
if self._temperature is not None:
|
|
152
|
+
kwargs["temperature"] = self._temperature
|
|
153
|
+
return kwargs
|
|
154
|
+
|
|
155
|
+
def format_text(self, system_prompt: str, text: str) -> str:
|
|
156
|
+
log.info("LLM-Anfrage (non-streaming, Modell: %s, Temperature: %s)", self._model, self._temperature)
|
|
157
|
+
self._last_usage = None
|
|
158
|
+
|
|
159
|
+
# V01: Retry-Loop fuer transiente Fehler
|
|
160
|
+
last_error: Exception | None = None
|
|
161
|
+
for attempt in range(1, _MAX_RETRIES + 1):
|
|
162
|
+
try:
|
|
163
|
+
response = self._client.chat.completions.create(
|
|
164
|
+
model=self._model,
|
|
165
|
+
messages=self._build_messages(system_prompt, text),
|
|
166
|
+
stream=False,
|
|
167
|
+
**self._completion_kwargs(),
|
|
168
|
+
)
|
|
169
|
+
break # Erfolg
|
|
170
|
+
except groq.AuthenticationError as e:
|
|
171
|
+
raise ProviderError(f"API-Key ungültig: {e}") from e
|
|
172
|
+
except _RETRYABLE_ERRORS as e:
|
|
173
|
+
last_error = e
|
|
174
|
+
if attempt < _MAX_RETRIES:
|
|
175
|
+
delay = _RETRY_BASE_DELAY * (2 ** (attempt - 1))
|
|
176
|
+
log.warning(
|
|
177
|
+
"LLM-Versuch %d/%d fehlgeschlagen: %s - Retry in %.1fs",
|
|
178
|
+
attempt, _MAX_RETRIES, e, delay,
|
|
179
|
+
)
|
|
180
|
+
time.sleep(delay)
|
|
181
|
+
else:
|
|
182
|
+
log.error("LLM: Alle %d Versuche fehlgeschlagen", _MAX_RETRIES)
|
|
183
|
+
except groq.APIError as e:
|
|
184
|
+
raise ProviderError(f"GroqCloud API-Fehler: {e}") from e
|
|
185
|
+
else:
|
|
186
|
+
e = last_error
|
|
187
|
+
if isinstance(e, groq.RateLimitError):
|
|
188
|
+
raise ProviderError(f"Rate Limit erreicht: {e}") from e
|
|
189
|
+
elif isinstance(e, groq.APITimeoutError):
|
|
190
|
+
raise ProviderError(f"GroqCloud Timeout: {e}") from e
|
|
191
|
+
else:
|
|
192
|
+
raise ProviderError(f"Keine Verbindung zu GroqCloud: {e}") from e
|
|
193
|
+
|
|
194
|
+
# Usage-Daten erfassen
|
|
195
|
+
if hasattr(response, "usage") and response.usage:
|
|
196
|
+
self._last_usage = {
|
|
197
|
+
"prompt_tokens": response.usage.prompt_tokens or 0,
|
|
198
|
+
"completion_tokens": response.usage.completion_tokens or 0,
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
result = response.choices[0].message.content or ""
|
|
202
|
+
result = result.strip()
|
|
203
|
+
log.info("LLM-Ergebnis: %d Zeichen", len(result))
|
|
204
|
+
return result
|
|
205
|
+
|
|
206
|
+
def format_text_stream(self, system_prompt: str, text: str) -> Iterator[str]:
|
|
207
|
+
log.info("LLM-Anfrage (streaming, Modell: %s, Temperature: %s)", self._model, self._temperature)
|
|
208
|
+
self._last_usage = None
|
|
209
|
+
|
|
210
|
+
# V01: Retry-Loop fuer transiente Fehler beim Stream-Aufbau
|
|
211
|
+
last_error: Exception | None = None
|
|
212
|
+
for attempt in range(1, _MAX_RETRIES + 1):
|
|
213
|
+
try:
|
|
214
|
+
stream = self._client.chat.completions.create(
|
|
215
|
+
model=self._model,
|
|
216
|
+
messages=self._build_messages(system_prompt, text),
|
|
217
|
+
stream=True,
|
|
218
|
+
**self._completion_kwargs(),
|
|
219
|
+
)
|
|
220
|
+
break # Erfolg
|
|
221
|
+
except groq.AuthenticationError as e:
|
|
222
|
+
raise ProviderError(f"API-Key ungültig: {e}") from e
|
|
223
|
+
except _RETRYABLE_ERRORS as e:
|
|
224
|
+
last_error = e
|
|
225
|
+
if attempt < _MAX_RETRIES:
|
|
226
|
+
delay = _RETRY_BASE_DELAY * (2 ** (attempt - 1))
|
|
227
|
+
log.warning(
|
|
228
|
+
"LLM-Stream-Versuch %d/%d fehlgeschlagen: %s - Retry in %.1fs",
|
|
229
|
+
attempt, _MAX_RETRIES, e, delay,
|
|
230
|
+
)
|
|
231
|
+
time.sleep(delay)
|
|
232
|
+
else:
|
|
233
|
+
log.error("LLM-Stream: Alle %d Versuche fehlgeschlagen", _MAX_RETRIES)
|
|
234
|
+
except groq.APIError as e:
|
|
235
|
+
raise ProviderError(f"GroqCloud API-Fehler: {e}") from e
|
|
236
|
+
else:
|
|
237
|
+
e = last_error
|
|
238
|
+
if isinstance(e, groq.RateLimitError):
|
|
239
|
+
raise ProviderError(f"Rate Limit erreicht: {e}") from e
|
|
240
|
+
elif isinstance(e, groq.APITimeoutError):
|
|
241
|
+
raise ProviderError(f"GroqCloud Timeout: {e}") from e
|
|
242
|
+
else:
|
|
243
|
+
raise ProviderError(f"Keine Verbindung zu GroqCloud: {e}") from e
|
|
244
|
+
|
|
245
|
+
# V02: Stream-Iteration in try/except — Verbindungsabbruch waehrend Streaming erkennen
|
|
246
|
+
total_chars = 0
|
|
247
|
+
try:
|
|
248
|
+
for chunk in stream:
|
|
249
|
+
delta = chunk.choices[0].delta.content
|
|
250
|
+
if delta:
|
|
251
|
+
total_chars += len(delta)
|
|
252
|
+
yield delta
|
|
253
|
+
# Groq streaming: Usage im letzten Chunk via x_groq
|
|
254
|
+
if (
|
|
255
|
+
hasattr(chunk, "x_groq")
|
|
256
|
+
and chunk.x_groq
|
|
257
|
+
and hasattr(chunk.x_groq, "usage")
|
|
258
|
+
and chunk.x_groq.usage
|
|
259
|
+
):
|
|
260
|
+
usage = chunk.x_groq.usage
|
|
261
|
+
self._last_usage = {
|
|
262
|
+
"prompt_tokens": getattr(usage, "prompt_tokens", 0) or 0,
|
|
263
|
+
"completion_tokens": getattr(usage, "completion_tokens", 0) or 0,
|
|
264
|
+
}
|
|
265
|
+
except groq.APIError as e:
|
|
266
|
+
raise ProviderError(
|
|
267
|
+
f"LLM-Stream abgebrochen nach {total_chars} Zeichen: {e}"
|
|
268
|
+
) from e
|
|
269
|
+
except Exception as e:
|
|
270
|
+
raise ProviderError(
|
|
271
|
+
f"LLM-Stream abgebrochen nach {total_chars} Zeichen: {e}"
|
|
272
|
+
) from e
|
|
273
|
+
log.info("LLM-Stream abgeschlossen: %d Zeichen", total_chars)
|
|
@@ -29,6 +29,7 @@ from paypertranscript.core.window_detector import WindowInfo, get_foreground_win
|
|
|
29
29
|
from paypertranscript.pipeline.transcription import (
|
|
30
30
|
STATUS_DONE,
|
|
31
31
|
STATUS_ERROR,
|
|
32
|
+
STATUS_LLM_FALLBACK,
|
|
32
33
|
STATUS_LLM_START,
|
|
33
34
|
STATUS_STT_START,
|
|
34
35
|
TranscriptionPipeline,
|
|
@@ -76,6 +77,7 @@ class AppSignals(QObject):
|
|
|
76
77
|
formatting_started = Signal()
|
|
77
78
|
processing_done = Signal()
|
|
78
79
|
processing_error = Signal(str)
|
|
80
|
+
done_message = Signal(str)
|
|
79
81
|
update_available = Signal(str)
|
|
80
82
|
update_not_available = Signal()
|
|
81
83
|
|
|
@@ -144,6 +146,8 @@ class PayPerTranscriptApp:
|
|
|
144
146
|
show_done_overlay=self._overlay.show_done,
|
|
145
147
|
on_update_check=self.trigger_update_check,
|
|
146
148
|
on_perform_update=self.perform_update_and_restart,
|
|
149
|
+
get_last_wav_path=self._get_last_wav_path,
|
|
150
|
+
on_retranscribe_precise=self._retranscribe_precise,
|
|
147
151
|
)
|
|
148
152
|
|
|
149
153
|
# Amplitude-Polling-Timer (30fps, laeuft nur waehrend Aufnahme)
|
|
@@ -154,6 +158,10 @@ class PayPerTranscriptApp:
|
|
|
154
158
|
self._connect_signals()
|
|
155
159
|
self._tray.show()
|
|
156
160
|
|
|
161
|
+
# V04: LLM-Initialisierungs-Warnung als Overlay anzeigen (nach Tray-Erstellung)
|
|
162
|
+
if self._llm_init_warning:
|
|
163
|
+
self._overlay.show_error("LLM deaktiviert")
|
|
164
|
+
|
|
157
165
|
# Periodischer Update-Check
|
|
158
166
|
self._update_timer = QTimer()
|
|
159
167
|
check_hours = self._config.get("updates.check_interval_hours", 24)
|
|
@@ -236,6 +244,7 @@ class PayPerTranscriptApp:
|
|
|
236
244
|
|
|
237
245
|
# LLM-Provider erstellen (optional)
|
|
238
246
|
self._llm_provider = None
|
|
247
|
+
self._llm_init_warning: str | None = None
|
|
239
248
|
try:
|
|
240
249
|
self._llm_provider = create_llm_provider(
|
|
241
250
|
config.get("api.provider", "groq"),
|
|
@@ -247,6 +256,10 @@ class PayPerTranscriptApp:
|
|
|
247
256
|
log.warning(
|
|
248
257
|
"LLM-Provider konnte nicht erstellt werden: %s - LLM-Formatierung deaktiviert", e
|
|
249
258
|
)
|
|
259
|
+
self._llm_init_warning = (
|
|
260
|
+
"LLM-Formatierung deaktiviert — Rohtext wird eingefügt.\n"
|
|
261
|
+
f"Grund: {e}"
|
|
262
|
+
)
|
|
250
263
|
|
|
251
264
|
# Session-Logger
|
|
252
265
|
self._session_logger = SessionLogger()
|
|
@@ -282,6 +295,58 @@ class PayPerTranscriptApp:
|
|
|
282
295
|
"""Gibt die letzte Transkription aus der Pipeline zurueck."""
|
|
283
296
|
return self._pipeline.last_transcription
|
|
284
297
|
|
|
298
|
+
def _get_last_wav_path(self):
|
|
299
|
+
"""Gibt den Pfad der letzten WAV-Datei zurueck (oder None)."""
|
|
300
|
+
path = self._pipeline.last_wav_path
|
|
301
|
+
if path and path.exists():
|
|
302
|
+
return path
|
|
303
|
+
return None
|
|
304
|
+
|
|
305
|
+
def _retranscribe_precise(self) -> None:
|
|
306
|
+
"""Transkribiert die letzte Aufnahme erneut mit whisper-large-v3."""
|
|
307
|
+
import threading
|
|
308
|
+
|
|
309
|
+
wav_path = self._get_last_wav_path()
|
|
310
|
+
if not wav_path:
|
|
311
|
+
self._signals.processing_error.emit("Keine Aufnahme")
|
|
312
|
+
return
|
|
313
|
+
|
|
314
|
+
def _worker():
|
|
315
|
+
try:
|
|
316
|
+
self._signals.recording_stopped.emit() # Overlay: "Transkribiere..."
|
|
317
|
+
|
|
318
|
+
api_key = load_api_key() or os.environ.get("GROQ_API_KEY")
|
|
319
|
+
stt = create_stt_provider(
|
|
320
|
+
self._config.get("api.provider", "groq"),
|
|
321
|
+
model="whisper-large-v3",
|
|
322
|
+
api_key=api_key,
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
language = self._config.get("general.language", "de")
|
|
326
|
+
words = self._config.get("words.misspelled_words", [])
|
|
327
|
+
from paypertranscript.pipeline.transcription import _build_word_list_prompt
|
|
328
|
+
prompt = _build_word_list_prompt(words)
|
|
329
|
+
|
|
330
|
+
text = stt.transcribe(wav_path, language=language, prompt=prompt)
|
|
331
|
+
if not text:
|
|
332
|
+
self._signals.processing_error.emit("Kein Text erkannt")
|
|
333
|
+
return
|
|
334
|
+
|
|
335
|
+
import pyperclip
|
|
336
|
+
pyperclip.copy(text)
|
|
337
|
+
self._pipeline.last_transcription = text
|
|
338
|
+
|
|
339
|
+
self._signals.processing_done.emit()
|
|
340
|
+
self._signals.done_message.emit("Kopiert")
|
|
341
|
+
except ProviderError as e:
|
|
342
|
+
log.error("Re-Transkription fehlgeschlagen: %s", e)
|
|
343
|
+
self._signals.processing_error.emit(f"Re-Transkription fehlgeschlagen: {e}")
|
|
344
|
+
except Exception as e:
|
|
345
|
+
log.error("Re-Transkription: Unerwarteter Fehler: %s", e, exc_info=True)
|
|
346
|
+
self._signals.processing_error.emit("Re-Transkription fehlgeschlagen")
|
|
347
|
+
|
|
348
|
+
threading.Thread(target=_worker, daemon=True, name="retranscribe-precise").start()
|
|
349
|
+
|
|
285
350
|
def _connect_signals(self) -> None:
|
|
286
351
|
"""Verbindet AppSignals mit Tray- und Overlay-Slots."""
|
|
287
352
|
# Tray
|
|
@@ -300,6 +365,9 @@ class PayPerTranscriptApp:
|
|
|
300
365
|
self._signals.processing_done.connect(self._overlay.show_done)
|
|
301
366
|
self._signals.processing_error.connect(self._overlay.show_error)
|
|
302
367
|
|
|
368
|
+
# Done mit Text (z.B. "Kopiert")
|
|
369
|
+
self._signals.done_message.connect(self._overlay.show_done_message)
|
|
370
|
+
|
|
303
371
|
# Update
|
|
304
372
|
self._signals.update_available.connect(self._tray.on_update_available)
|
|
305
373
|
self._signals.update_not_available.connect(self._tray.on_update_not_available)
|
|
@@ -334,7 +402,7 @@ class PayPerTranscriptApp:
|
|
|
334
402
|
self._signals.recording_stopped.emit()
|
|
335
403
|
|
|
336
404
|
if audio is None:
|
|
337
|
-
self._signals.
|
|
405
|
+
self._signals.processing_error.emit("Keine Aufnahme")
|
|
338
406
|
return
|
|
339
407
|
|
|
340
408
|
actual_duration = len(audio) / 16000
|
|
@@ -345,7 +413,7 @@ class PayPerTranscriptApp:
|
|
|
345
413
|
MIN_RECORDING_DURATION,
|
|
346
414
|
)
|
|
347
415
|
self._current_window = None
|
|
348
|
-
self._signals.
|
|
416
|
+
self._signals.processing_error.emit("Aufnahme zu kurz")
|
|
349
417
|
return
|
|
350
418
|
|
|
351
419
|
# WAV speichern
|
|
@@ -365,10 +433,6 @@ class PayPerTranscriptApp:
|
|
|
365
433
|
"Aufnahme sehr lang (%.0fs) - wird trotzdem gesendet",
|
|
366
434
|
actual_duration,
|
|
367
435
|
)
|
|
368
|
-
self._tray.show_info(
|
|
369
|
-
f"Lange Aufnahme ({actual_duration / 60:.0f} Min). "
|
|
370
|
-
"Wird trotzdem gesendet."
|
|
371
|
-
)
|
|
372
436
|
|
|
373
437
|
# Pipeline in Hintergrund-Thread starten (non-blocking)
|
|
374
438
|
self._pipeline.process_async(
|
|
@@ -392,6 +456,8 @@ class PayPerTranscriptApp:
|
|
|
392
456
|
self._signals.formatting_started.emit()
|
|
393
457
|
elif status == STATUS_DONE:
|
|
394
458
|
self._signals.processing_done.emit()
|
|
459
|
+
elif status == STATUS_LLM_FALLBACK:
|
|
460
|
+
log.info("LLM-Fallback auf Rohtext: %s", detail)
|
|
395
461
|
elif status == STATUS_ERROR:
|
|
396
462
|
self._signals.processing_error.emit(detail or "Transkription fehlgeschlagen")
|
|
397
463
|
|
|
@@ -51,6 +51,7 @@ _FPS_INTERVAL = 16 # ~60fps
|
|
|
51
51
|
_FADE_IN_MS = 120
|
|
52
52
|
_FADE_OUT_MS = 200 # Smooth fade-out (nicht abrupt)
|
|
53
53
|
_DONE_SHOW_MS = 700 # Kurz sichtbar, dann smooth weg
|
|
54
|
+
_DONE_MSG_SHOW_MS = 1200 # Done mit Text: laenger sichtbar damit lesbar
|
|
54
55
|
_ERROR_SHOW_MS = 2500
|
|
55
56
|
|
|
56
57
|
# -- Visualizer --
|
|
@@ -76,6 +77,7 @@ class StatusOverlay(QWidget):
|
|
|
76
77
|
TRANSCRIBING = "transcribing"
|
|
77
78
|
FORMATTING = "formatting"
|
|
78
79
|
DONE = "done"
|
|
80
|
+
DONE_MESSAGE = "done_message"
|
|
79
81
|
ERROR = "error"
|
|
80
82
|
|
|
81
83
|
def __init__(self, config: ConfigManager, parent: QWidget | None = None) -> None:
|
|
@@ -83,6 +85,7 @@ class StatusOverlay(QWidget):
|
|
|
83
85
|
self._config = config
|
|
84
86
|
self._state: str | None = None
|
|
85
87
|
self._error_message = ""
|
|
88
|
+
self._done_message = ""
|
|
86
89
|
|
|
87
90
|
# Animation
|
|
88
91
|
self._tick = 0
|
|
@@ -134,6 +137,12 @@ class StatusOverlay(QWidget):
|
|
|
134
137
|
self._switch_state(self.DONE)
|
|
135
138
|
self._hide_timer.start(_DONE_SHOW_MS)
|
|
136
139
|
|
|
140
|
+
@Slot(str)
|
|
141
|
+
def show_done_message(self, message: str) -> None:
|
|
142
|
+
self._done_message = message
|
|
143
|
+
self._switch_state(self.DONE_MESSAGE)
|
|
144
|
+
self._hide_timer.start(_DONE_MSG_SHOW_MS)
|
|
145
|
+
|
|
137
146
|
@Slot(str)
|
|
138
147
|
def show_error(self, message: str) -> None:
|
|
139
148
|
self._error_message = message
|
|
@@ -263,13 +272,15 @@ class StatusOverlay(QWidget):
|
|
|
263
272
|
self._draw_processing(p)
|
|
264
273
|
elif self._state == self.DONE:
|
|
265
274
|
self._draw_done(p)
|
|
275
|
+
elif self._state == self.DONE_MESSAGE:
|
|
276
|
+
self._draw_done_message(p)
|
|
266
277
|
elif self._state == self.ERROR:
|
|
267
278
|
self._draw_error(p)
|
|
268
279
|
|
|
269
280
|
p.end()
|
|
270
281
|
|
|
271
282
|
def _accent_color(self) -> QColor:
|
|
272
|
-
if self._state
|
|
283
|
+
if self._state in (self.DONE, self.DONE_MESSAGE):
|
|
273
284
|
return _GREEN
|
|
274
285
|
if self._state == self.ERROR:
|
|
275
286
|
return _RED
|
|
@@ -365,6 +376,37 @@ class StatusOverlay(QWidget):
|
|
|
365
376
|
p.drawLine(int(cx - 6), int(cy), int(cx - 2), int(cy + 5))
|
|
366
377
|
p.drawLine(int(cx - 2), int(cy + 5), int(cx + 7), int(cy - 4))
|
|
367
378
|
|
|
379
|
+
def _draw_done_message(self, p: QPainter) -> None:
|
|
380
|
+
"""Gruener Checkmark + Text (z.B. 'Kopiert')."""
|
|
381
|
+
ix = 24
|
|
382
|
+
cy = _HEIGHT / 2
|
|
383
|
+
|
|
384
|
+
# Glow
|
|
385
|
+
glow = QRadialGradient(ix, cy, 12)
|
|
386
|
+
g = QColor(_GREEN)
|
|
387
|
+
g.setAlpha(30)
|
|
388
|
+
glow.setColorAt(0.0, g)
|
|
389
|
+
glow.setColorAt(1.0, QColor(0, 0, 0, 0))
|
|
390
|
+
p.setPen(Qt.PenStyle.NoPen)
|
|
391
|
+
p.setBrush(glow)
|
|
392
|
+
p.drawEllipse(QRectF(ix - 12, cy - 12, 24, 24))
|
|
393
|
+
|
|
394
|
+
# Checkmark
|
|
395
|
+
pen = QPen(_GREEN, 2.0, Qt.PenStyle.SolidLine,
|
|
396
|
+
Qt.PenCapStyle.RoundCap, Qt.PenJoinStyle.RoundJoin)
|
|
397
|
+
p.setPen(pen)
|
|
398
|
+
p.drawLine(int(ix - 6), int(cy), int(ix - 2), int(cy + 5))
|
|
399
|
+
p.drawLine(int(ix - 2), int(cy + 5), int(ix + 7), int(cy - 4))
|
|
400
|
+
|
|
401
|
+
# Text
|
|
402
|
+
font = QFont("Segoe UI", 8)
|
|
403
|
+
font.setWeight(QFont.Weight.Medium)
|
|
404
|
+
p.setFont(font)
|
|
405
|
+
p.setPen(QPen(_TEXT_PRIMARY))
|
|
406
|
+
msg = self._done_message or "OK"
|
|
407
|
+
p.drawText(QRectF(ix + 14, 0, _WIDTH - ix - 24, _HEIGHT),
|
|
408
|
+
Qt.AlignmentFlag.AlignVCenter | Qt.TextFlag.TextSingleLine, msg)
|
|
409
|
+
|
|
368
410
|
def _draw_error(self, p: QPainter) -> None:
|
|
369
411
|
"""Rotes X + Fehlermeldung."""
|
|
370
412
|
ix = 24
|
|
@@ -178,6 +178,8 @@ class SystemTray:
|
|
|
178
178
|
show_done_overlay: Callable[[], None] | None = None,
|
|
179
179
|
on_update_check: Callable[[], None] | None = None,
|
|
180
180
|
on_perform_update: Callable[[], None] | None = None,
|
|
181
|
+
get_last_wav_path: Callable | None = None,
|
|
182
|
+
on_retranscribe_precise: Callable[[], None] | None = None,
|
|
181
183
|
parent: QWidget | None = None,
|
|
182
184
|
) -> None:
|
|
183
185
|
self._config = config
|
|
@@ -187,6 +189,8 @@ class SystemTray:
|
|
|
187
189
|
self._show_done_overlay = show_done_overlay
|
|
188
190
|
self._on_update_check = on_update_check
|
|
189
191
|
self._on_perform_update = on_perform_update
|
|
192
|
+
self._get_last_wav_path = get_last_wav_path
|
|
193
|
+
self._on_retranscribe_precise = on_retranscribe_precise
|
|
190
194
|
self._icons = create_tray_icons()
|
|
191
195
|
|
|
192
196
|
# MainWindow (lazy creation)
|
|
@@ -207,15 +211,6 @@ class SystemTray:
|
|
|
207
211
|
self._tray.show()
|
|
208
212
|
log.info("System Tray angezeigt")
|
|
209
213
|
|
|
210
|
-
def show_info(self, message: str) -> None:
|
|
211
|
-
"""Zeigt eine Info-Benachrichtigung im System Tray."""
|
|
212
|
-
self._tray.showMessage(
|
|
213
|
-
"PayPerTranscript",
|
|
214
|
-
message,
|
|
215
|
-
QSystemTrayIcon.MessageIcon.Information,
|
|
216
|
-
3000,
|
|
217
|
-
)
|
|
218
|
-
|
|
219
214
|
def hide(self) -> None:
|
|
220
215
|
"""Versteckt das Tray-Icon."""
|
|
221
216
|
self._tray.hide()
|
|
@@ -247,6 +242,8 @@ class SystemTray:
|
|
|
247
242
|
self._tray.setToolTip(self._build_tooltip())
|
|
248
243
|
if self._get_last_transcription and self._get_last_transcription():
|
|
249
244
|
self._act_copy_last.setEnabled(True)
|
|
245
|
+
if self._get_last_wav_path and self._get_last_wav_path():
|
|
246
|
+
self._act_retranscribe.setEnabled(True)
|
|
250
247
|
|
|
251
248
|
@Slot(str)
|
|
252
249
|
def on_processing_error(self, message: str) -> None:
|
|
@@ -271,6 +268,10 @@ class SystemTray:
|
|
|
271
268
|
self._act_copy_last.triggered.connect(self._on_copy_last_transcription)
|
|
272
269
|
self._act_copy_last.setEnabled(False)
|
|
273
270
|
|
|
271
|
+
self._act_retranscribe = self._menu.addAction("Erneut transkribieren (Pr\u00e4zise)")
|
|
272
|
+
self._act_retranscribe.triggered.connect(self._on_retranscribe)
|
|
273
|
+
self._act_retranscribe.setEnabled(False)
|
|
274
|
+
|
|
274
275
|
self._menu.addSeparator()
|
|
275
276
|
|
|
276
277
|
act_quit = self._menu.addAction("Beenden")
|
|
@@ -321,6 +322,11 @@ class SystemTray:
|
|
|
321
322
|
else:
|
|
322
323
|
log.info("Keine Transkription zum Kopieren vorhanden")
|
|
323
324
|
|
|
325
|
+
def _on_retranscribe(self) -> None:
|
|
326
|
+
"""Startet Re-Transkription mit praezisem Modell."""
|
|
327
|
+
if self._on_retranscribe_precise:
|
|
328
|
+
self._on_retranscribe_precise()
|
|
329
|
+
|
|
324
330
|
# -- Update-Callbacks --
|
|
325
331
|
|
|
326
332
|
def _get_update_dialog(self) -> _UpdateInfoDialog:
|
|
@@ -1,193 +0,0 @@
|
|
|
1
|
-
"""GroqCloud API-Provider für PayPerTranscript.
|
|
2
|
-
|
|
3
|
-
Implementiert STT (Whisper) und LLM-Formatierung über die GroqCloud API.
|
|
4
|
-
"""
|
|
5
|
-
|
|
6
|
-
from collections.abc import Iterator
|
|
7
|
-
from pathlib import Path
|
|
8
|
-
|
|
9
|
-
import groq
|
|
10
|
-
|
|
11
|
-
from paypertranscript.core.logging import get_logger
|
|
12
|
-
from paypertranscript.providers.base import AbstractLLMProvider, AbstractSTTProvider, ProviderError
|
|
13
|
-
|
|
14
|
-
log = get_logger("providers.groq")
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
class GroqSTTProvider(AbstractSTTProvider):
|
|
18
|
-
"""GroqCloud Whisper STT-Provider.
|
|
19
|
-
|
|
20
|
-
Nutzt whisper-large-v3-turbo für Speech-to-Text.
|
|
21
|
-
Der Groq-Client wird einmal instanziiert und wiederverwendet
|
|
22
|
-
(Connection Pooling via httpx).
|
|
23
|
-
"""
|
|
24
|
-
|
|
25
|
-
def __init__(
|
|
26
|
-
self,
|
|
27
|
-
api_key: str | None = None,
|
|
28
|
-
model: str = "whisper-large-v3-turbo",
|
|
29
|
-
) -> None:
|
|
30
|
-
self._model = model
|
|
31
|
-
try:
|
|
32
|
-
self._client = groq.Groq(api_key=api_key)
|
|
33
|
-
except groq.GroqError as e:
|
|
34
|
-
raise ProviderError(f"Groq-Client konnte nicht erstellt werden: {e}") from e
|
|
35
|
-
log.info("GroqSTTProvider initialisiert (Modell: %s)", self._model)
|
|
36
|
-
|
|
37
|
-
def transcribe(self, audio_path: Path, language: str, prompt: str = "") -> str:
|
|
38
|
-
"""Transkribiert eine WAV-Datei via GroqCloud Whisper API."""
|
|
39
|
-
if not audio_path.exists():
|
|
40
|
-
raise ProviderError(f"Audio-Datei nicht gefunden: {audio_path}")
|
|
41
|
-
|
|
42
|
-
log.info(
|
|
43
|
-
"STT-Anfrage: %s (Sprache: %s, Modell: %s)",
|
|
44
|
-
audio_path.name,
|
|
45
|
-
language,
|
|
46
|
-
self._model,
|
|
47
|
-
)
|
|
48
|
-
if prompt:
|
|
49
|
-
log.info("STT-Prompt: %s", prompt)
|
|
50
|
-
|
|
51
|
-
try:
|
|
52
|
-
with open(audio_path, "rb") as audio_file:
|
|
53
|
-
transcription = self._client.audio.transcriptions.create(
|
|
54
|
-
model=self._model,
|
|
55
|
-
file=audio_file,
|
|
56
|
-
language=language,
|
|
57
|
-
prompt=prompt,
|
|
58
|
-
response_format="text",
|
|
59
|
-
temperature=0.0,
|
|
60
|
-
)
|
|
61
|
-
except groq.AuthenticationError as e:
|
|
62
|
-
raise ProviderError(f"API-Key ungültig: {e}") from e
|
|
63
|
-
except groq.RateLimitError as e:
|
|
64
|
-
raise ProviderError(f"Rate Limit erreicht: {e}") from e
|
|
65
|
-
except groq.APIConnectionError as e:
|
|
66
|
-
raise ProviderError(f"Keine Verbindung zu GroqCloud: {e}") from e
|
|
67
|
-
except groq.APITimeoutError as e:
|
|
68
|
-
raise ProviderError(f"GroqCloud Timeout: {e}") from e
|
|
69
|
-
except groq.APIError as e:
|
|
70
|
-
raise ProviderError(f"GroqCloud API-Fehler: {e}") from e
|
|
71
|
-
|
|
72
|
-
# response_format="text" gibt direkt einen String zurück
|
|
73
|
-
text = transcription.strip() if isinstance(transcription, str) else transcription.text.strip()
|
|
74
|
-
|
|
75
|
-
log.info("STT-Ergebnis: %d Zeichen", len(text))
|
|
76
|
-
return text
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
class GroqLLMProvider(AbstractLLMProvider):
|
|
80
|
-
"""GroqCloud LLM-Provider für Textformatierung.
|
|
81
|
-
|
|
82
|
-
Nutzt openai/gpt-oss-20b für kontextabhängige Formatierung.
|
|
83
|
-
Der Groq-Client wird einmal instanziiert und wiederverwendet.
|
|
84
|
-
"""
|
|
85
|
-
|
|
86
|
-
def __init__(
|
|
87
|
-
self,
|
|
88
|
-
api_key: str | None = None,
|
|
89
|
-
model: str = "openai/gpt-oss-20b",
|
|
90
|
-
temperature: float | None = None,
|
|
91
|
-
) -> None:
|
|
92
|
-
self._model = model
|
|
93
|
-
self._temperature = temperature
|
|
94
|
-
self._last_usage: dict[str, int] | None = None
|
|
95
|
-
try:
|
|
96
|
-
self._client = groq.Groq(api_key=api_key)
|
|
97
|
-
except groq.GroqError as e:
|
|
98
|
-
raise ProviderError(f"Groq-Client konnte nicht erstellt werden: {e}") from e
|
|
99
|
-
log.info("GroqLLMProvider initialisiert (Modell: %s, Temperature: %s)", self._model, self._temperature)
|
|
100
|
-
|
|
101
|
-
@property
|
|
102
|
-
def last_usage(self) -> dict[str, int] | None:
|
|
103
|
-
"""Token-Usage der letzten LLM-Anfrage."""
|
|
104
|
-
return self._last_usage
|
|
105
|
-
|
|
106
|
-
def _build_messages(
|
|
107
|
-
self, system_prompt: str, text: str
|
|
108
|
-
) -> list[dict[str, str]]:
|
|
109
|
-
return [
|
|
110
|
-
{"role": "system", "content": system_prompt},
|
|
111
|
-
{"role": "user", "content": f"<transcript>{text}</transcript>"},
|
|
112
|
-
]
|
|
113
|
-
|
|
114
|
-
def _completion_kwargs(self) -> dict:
|
|
115
|
-
"""Baut gemeinsame kwargs für chat.completions.create."""
|
|
116
|
-
kwargs: dict = {}
|
|
117
|
-
if self._temperature is not None:
|
|
118
|
-
kwargs["temperature"] = self._temperature
|
|
119
|
-
return kwargs
|
|
120
|
-
|
|
121
|
-
def format_text(self, system_prompt: str, text: str) -> str:
|
|
122
|
-
log.info("LLM-Anfrage (non-streaming, Modell: %s, Temperature: %s)", self._model, self._temperature)
|
|
123
|
-
self._last_usage = None
|
|
124
|
-
try:
|
|
125
|
-
response = self._client.chat.completions.create(
|
|
126
|
-
model=self._model,
|
|
127
|
-
messages=self._build_messages(system_prompt, text),
|
|
128
|
-
stream=False,
|
|
129
|
-
**self._completion_kwargs(),
|
|
130
|
-
)
|
|
131
|
-
except groq.AuthenticationError as e:
|
|
132
|
-
raise ProviderError(f"API-Key ungültig: {e}") from e
|
|
133
|
-
except groq.RateLimitError as e:
|
|
134
|
-
raise ProviderError(f"Rate Limit erreicht: {e}") from e
|
|
135
|
-
except groq.APIConnectionError as e:
|
|
136
|
-
raise ProviderError(f"Keine Verbindung zu GroqCloud: {e}") from e
|
|
137
|
-
except groq.APITimeoutError as e:
|
|
138
|
-
raise ProviderError(f"GroqCloud Timeout: {e}") from e
|
|
139
|
-
except groq.APIError as e:
|
|
140
|
-
raise ProviderError(f"GroqCloud API-Fehler: {e}") from e
|
|
141
|
-
|
|
142
|
-
# Usage-Daten erfassen
|
|
143
|
-
if hasattr(response, "usage") and response.usage:
|
|
144
|
-
self._last_usage = {
|
|
145
|
-
"prompt_tokens": response.usage.prompt_tokens or 0,
|
|
146
|
-
"completion_tokens": response.usage.completion_tokens or 0,
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
result = response.choices[0].message.content or ""
|
|
150
|
-
result = result.strip()
|
|
151
|
-
log.info("LLM-Ergebnis: %d Zeichen", len(result))
|
|
152
|
-
return result
|
|
153
|
-
|
|
154
|
-
def format_text_stream(self, system_prompt: str, text: str) -> Iterator[str]:
|
|
155
|
-
log.info("LLM-Anfrage (streaming, Modell: %s, Temperature: %s)", self._model, self._temperature)
|
|
156
|
-
self._last_usage = None
|
|
157
|
-
try:
|
|
158
|
-
stream = self._client.chat.completions.create(
|
|
159
|
-
model=self._model,
|
|
160
|
-
messages=self._build_messages(system_prompt, text),
|
|
161
|
-
stream=True,
|
|
162
|
-
**self._completion_kwargs(),
|
|
163
|
-
)
|
|
164
|
-
except groq.AuthenticationError as e:
|
|
165
|
-
raise ProviderError(f"API-Key ungültig: {e}") from e
|
|
166
|
-
except groq.RateLimitError as e:
|
|
167
|
-
raise ProviderError(f"Rate Limit erreicht: {e}") from e
|
|
168
|
-
except groq.APIConnectionError as e:
|
|
169
|
-
raise ProviderError(f"Keine Verbindung zu GroqCloud: {e}") from e
|
|
170
|
-
except groq.APITimeoutError as e:
|
|
171
|
-
raise ProviderError(f"GroqCloud Timeout: {e}") from e
|
|
172
|
-
except groq.APIError as e:
|
|
173
|
-
raise ProviderError(f"GroqCloud API-Fehler: {e}") from e
|
|
174
|
-
|
|
175
|
-
total_chars = 0
|
|
176
|
-
for chunk in stream:
|
|
177
|
-
delta = chunk.choices[0].delta.content
|
|
178
|
-
if delta:
|
|
179
|
-
total_chars += len(delta)
|
|
180
|
-
yield delta
|
|
181
|
-
# Groq streaming: Usage im letzten Chunk via x_groq
|
|
182
|
-
if (
|
|
183
|
-
hasattr(chunk, "x_groq")
|
|
184
|
-
and chunk.x_groq
|
|
185
|
-
and hasattr(chunk.x_groq, "usage")
|
|
186
|
-
and chunk.x_groq.usage
|
|
187
|
-
):
|
|
188
|
-
usage = chunk.x_groq.usage
|
|
189
|
-
self._last_usage = {
|
|
190
|
-
"prompt_tokens": getattr(usage, "prompt_tokens", 0) or 0,
|
|
191
|
-
"completion_tokens": getattr(usage, "completion_tokens", 0) or 0,
|
|
192
|
-
}
|
|
193
|
-
log.info("LLM-Stream abgeschlossen: %d Zeichen", total_chars)
|
|
File without changes
|
|
File without changes
|
{paypertranscript-0.2.9 → paypertranscript-0.3.0}/PayPerTranscript.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
{paypertranscript-0.2.9 → paypertranscript-0.3.0}/PayPerTranscript.egg-info/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/assets/icons/arrow_down.svg
RENAMED
|
File without changes
|
|
File without changes
|
{paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/assets/icons/tray_green.png
RENAMED
|
File without changes
|
{paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/assets/icons/tray_orange.png
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/ui/pages/settings_page.py
RENAMED
|
File without changes
|
{paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/ui/pages/statistics_page.py
RENAMED
|
File without changes
|
{paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/ui/pages/window_mapping_page.py
RENAMED
|
File without changes
|
{paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/ui/pages/word_list_page.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|