PayPerTranscript 0.2.9__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/PKG-INFO +1 -1
  2. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/PayPerTranscript.egg-info/PKG-INFO +1 -1
  3. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/__init__.py +1 -1
  4. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/core/hotkey.py +21 -1
  5. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/core/text_inserter.py +23 -8
  6. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/pipeline/transcription.py +45 -3
  7. paypertranscript-0.3.0/paypertranscript/providers/groq_provider.py +273 -0
  8. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/ui/app.py +72 -6
  9. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/ui/overlay.py +43 -1
  10. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/ui/tray.py +15 -9
  11. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/pyproject.toml +1 -1
  12. paypertranscript-0.2.9/paypertranscript/providers/groq_provider.py +0 -193
  13. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/LICENSE +0 -0
  14. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/PayPerTranscript.egg-info/SOURCES.txt +0 -0
  15. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/PayPerTranscript.egg-info/dependency_links.txt +0 -0
  16. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/PayPerTranscript.egg-info/entry_points.txt +0 -0
  17. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/PayPerTranscript.egg-info/requires.txt +0 -0
  18. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/PayPerTranscript.egg-info/top_level.txt +0 -0
  19. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/README.md +0 -0
  20. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/__main__.py +0 -0
  21. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/assets/icons/app.ico +0 -0
  22. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/assets/icons/app.png +0 -0
  23. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/assets/icons/app_big.png +0 -0
  24. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/assets/icons/arrow_down.svg +0 -0
  25. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/assets/icons/tray.png +0 -0
  26. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/assets/icons/tray_green.png +0 -0
  27. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/assets/icons/tray_orange.png +0 -0
  28. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/assets/sounds/start.wav +0 -0
  29. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/assets/sounds/stop.wav +0 -0
  30. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/assets/styles/dark.qss +0 -0
  31. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/core/__init__.py +0 -0
  32. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/core/audio_manager.py +0 -0
  33. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/core/config.py +0 -0
  34. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/core/cost_tracker.py +0 -0
  35. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/core/logging.py +0 -0
  36. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/core/paths.py +0 -0
  37. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/core/recorder.py +0 -0
  38. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/core/session_logger.py +0 -0
  39. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/core/updater.py +0 -0
  40. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/core/window_detector.py +0 -0
  41. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/pipeline/__init__.py +0 -0
  42. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/providers/__init__.py +0 -0
  43. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/providers/base.py +0 -0
  44. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/ui/__init__.py +0 -0
  45. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/ui/animated.py +0 -0
  46. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/ui/constants.py +0 -0
  47. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/ui/main_window.py +0 -0
  48. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/ui/pages/__init__.py +0 -0
  49. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/ui/pages/home_page.py +0 -0
  50. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/ui/pages/settings_page.py +0 -0
  51. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/ui/pages/statistics_page.py +0 -0
  52. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/ui/pages/window_mapping_page.py +0 -0
  53. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/ui/pages/word_list_page.py +0 -0
  54. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/ui/setup_wizard.py +0 -0
  55. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/ui/sidebar.py +0 -0
  56. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/paypertranscript/ui/widgets.py +0 -0
  57. {paypertranscript-0.2.9 → paypertranscript-0.3.0}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: PayPerTranscript
3
- Version: 0.2.9
3
+ Version: 0.3.0
4
4
  Summary: Open-Source Voice-to-Text mit Pay-per-Use Pricing
5
5
  Author: PayPerTranscript Contributors
6
6
  License-Expression: MIT
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: PayPerTranscript
3
- Version: 0.2.9
3
+ Version: 0.3.0
4
4
  Summary: Open-Source Voice-to-Text mit Pay-per-Use Pricing
5
5
  Author: PayPerTranscript Contributors
6
6
  License-Expression: MIT
@@ -1,3 +1,3 @@
1
1
  """PayPerTranscript - Voice-to-Text mit Pay-per-Use Pricing."""
2
2
 
3
- __version__ = "0.2.9"
3
+ __version__ = "0.3.0"
@@ -54,6 +54,11 @@ _MODIFIER_GROUPS: dict[str, set[keyboard.Key]] = {
54
54
  "cmd": {keyboard.Key.cmd, keyboard.Key.cmd_l, keyboard.Key.cmd_r},
55
55
  }
56
56
 
57
+ # Alle Modifier-Keys (flach) fuer Exakt-Match-Pruefung
58
+ _ALL_MODIFIER_KEYS: set[keyboard.Key] = set()
59
+ for _grp in _MODIFIER_GROUPS.values():
60
+ _ALL_MODIFIER_KEYS |= _grp
61
+
57
62
  # Alt-Keys fuer Menu-Bar-Workaround (Windows aktiviert Menueleiste bei bare Alt-Release)
58
63
  _ALT_KEYS: set[keyboard.Key] = {keyboard.Key.alt_l, keyboard.Key.alt_r}
59
64
 
@@ -149,18 +154,33 @@ class HotkeyListener:
149
154
  target_keys: list[keyboard.Key | keyboard.KeyCode],
150
155
  modifier_groups: list[set[keyboard.Key]],
151
156
  ) -> bool:
152
- """Prüft ob eine Tastenkombination aktuell gedrückt ist."""
157
+ """Prüft ob eine Tastenkombination aktuell gedrückt ist.
158
+
159
+ Exaktes Modifier-Matching: es muessen genau die konfigurierten Modifier
160
+ gedrueckt sein, keine zusaetzlichen. Damit wird verhindert, dass z.B.
161
+ Ctrl+Win auch durch Ctrl+Shift+Alt+F9 ausgeloest wird.
162
+ """
153
163
  if not target_keys:
154
164
  return False
155
165
 
166
+ # Sammle welche Modifier-Gruppen zum Hotkey gehoeren
167
+ required_modifier_keys: set[keyboard.Key] = set()
168
+
156
169
  for i, target_key in enumerate(target_keys):
157
170
  # Für Modifier: prüfe ob *irgendein* Key aus der Gruppe gedrückt ist
158
171
  if i < len(modifier_groups) and modifier_groups[i]:
159
172
  if not (modifier_groups[i] & self._pressed_keys):
160
173
  return False
174
+ required_modifier_keys |= modifier_groups[i]
161
175
  else:
162
176
  if target_key not in self._pressed_keys:
163
177
  return False
178
+
179
+ # Pruefe ob Extra-Modifier gedrueckt sind, die nicht zum Hotkey gehoeren
180
+ extra_modifiers = (self._pressed_keys & _ALL_MODIFIER_KEYS) - required_modifier_keys
181
+ if extra_modifiers:
182
+ return False
183
+
164
184
  return True
165
185
 
166
186
  def _combo_uses_alt(self, target_keys: list[keyboard.Key | keyboard.KeyCode]) -> bool:
@@ -19,6 +19,27 @@ log = get_logger("core.text_inserter")
19
19
  pyautogui.FAILSAFE = False
20
20
  pyautogui.PAUSE = 0
21
21
 
22
+ # Clipboard-Wiederherstellung: Retry-Konfiguration
23
+ _CLIPBOARD_RESTORE_RETRIES = 3
24
+ _CLIPBOARD_RESTORE_DELAY = 0.05 # 50ms zwischen Versuchen
25
+
26
+
27
+ def _restore_clipboard(content: str) -> None:
28
+ """Stellt die Zwischenablage wieder her mit Retry-Logik.
29
+
30
+ Andere Apps (Clipboard-Manager, Password-Manager) koennen die
31
+ Zwischenablage kurzzeitig sperren. Daher mehrere Versuche.
32
+ """
33
+ for attempt in range(1, _CLIPBOARD_RESTORE_RETRIES + 1):
34
+ try:
35
+ pyperclip.copy(content)
36
+ return
37
+ except Exception:
38
+ if attempt < _CLIPBOARD_RESTORE_RETRIES:
39
+ time.sleep(_CLIPBOARD_RESTORE_DELAY)
40
+ else:
41
+ log.warning("Zwischenablage konnte nicht wiederhergestellt werden (nach %d Versuchen)", _CLIPBOARD_RESTORE_RETRIES)
42
+
22
43
 
23
44
  def insert_text(text: str) -> None:
24
45
  """Fügt Text an der aktuellen Cursor-Position ein.
@@ -62,10 +83,7 @@ def insert_text(text: str) -> None:
62
83
 
63
84
  finally:
64
85
  # 5. Alte Zwischenablage wiederherstellen
65
- try:
66
- pyperclip.copy(old_clipboard)
67
- except Exception:
68
- log.debug("Zwischenablage konnte nicht wiederhergestellt werden")
86
+ _restore_clipboard(old_clipboard)
69
87
 
70
88
 
71
89
  # Intervall (Sekunden) zwischen Chunk-Pastes bei Streaming-Typing
@@ -125,7 +143,4 @@ def insert_text_streaming(chunks: Iterator[str]) -> None:
125
143
  log.error("Auch Fallback-Paste fehlgeschlagen")
126
144
 
127
145
  finally:
128
- try:
129
- pyperclip.copy(old_clipboard)
130
- except Exception:
131
- log.debug("Zwischenablage konnte nicht wiederhergestellt werden")
146
+ _restore_clipboard(old_clipboard)
@@ -26,9 +26,38 @@ STATUS_STT_DONE = "stt_done"
26
26
  STATUS_LLM_START = "llm_start"
27
27
  STATUS_DONE = "done"
28
28
  STATUS_ERROR = "error"
29
+ STATUS_LLM_FALLBACK = "llm_fallback"
29
30
 
30
31
  log = get_logger("pipeline.transcription")
31
32
 
33
+ # Halluzinationsfilter: Whisper halluziniert bei kurzen Aufnahmen ohne Sprache
34
+ _HALLUCINATION_PATTERNS = [
35
+ "copyright", "untertitel", "subtitles by",
36
+ "thanks for watching", "thank you for watching",
37
+ "sous-titres", "amara.org",
38
+ ]
39
+ _HALLUCINATION_MAX_DURATION = 5.0
40
+
41
+
42
+ def _is_hallucination(text: str, audio_duration: float) -> bool:
43
+ """Prueft ob ein STT-Ergebnis eine Whisper-Halluzination ist.
44
+
45
+ Bei kurzen Aufnahmen (< 5s) ohne Sprache halluziniert Whisper
46
+ stereotypische Strings wie "Copyright Australian Broadcasting Corporation".
47
+
48
+ Args:
49
+ text: STT-Ergebnis.
50
+ audio_duration: Audio-Dauer in Sekunden.
51
+
52
+ Returns:
53
+ True wenn der Text als Halluzination erkannt wurde.
54
+ """
55
+ if audio_duration >= _HALLUCINATION_MAX_DURATION:
56
+ return False
57
+ text_lower = text.lower()
58
+ return any(pattern in text_lower for pattern in _HALLUCINATION_PATTERNS)
59
+
60
+
32
61
  # Maximale Prompt-Laenge fuer Whisper (224 Tokens).
33
62
  # Konservative Schaetzung: ~4 Zeichen pro Token fuer gemischten DE/EN Text.
34
63
  _MAX_PROMPT_CHARS = 896
@@ -86,6 +115,7 @@ class TranscriptionPipeline:
86
115
  self._config = config
87
116
  self._session_logger = session_logger
88
117
  self.last_transcription: str | None = None
118
+ self.last_wav_path: Path | None = None
89
119
  log.info(
90
120
  "TranscriptionPipeline initialisiert (LLM: %s, Tracking: %s)",
91
121
  "aktiv" if llm_provider else "deaktiviert",
@@ -219,6 +249,8 @@ class TranscriptionPipeline:
219
249
  except Exception:
220
250
  pass
221
251
 
252
+ self.last_wav_path = wav_path
253
+
222
254
  try:
223
255
  # Audio-Dauer: entweder uebergeben oder aus WAV-Datei berechnen
224
256
  if audio_duration is None:
@@ -241,7 +273,16 @@ class TranscriptionPipeline:
241
273
 
242
274
  if not text:
243
275
  log.info("Pipeline: STT lieferte leeren Text - uebersprungen")
244
- _notify(STATUS_DONE)
276
+ _notify(STATUS_ERROR, "Kein Text erkannt")
277
+ return
278
+
279
+ if _is_hallucination(text, audio_duration):
280
+ log.info(
281
+ "Pipeline: Halluzination erkannt (%.1fs, '%s') - uebersprungen",
282
+ audio_duration,
283
+ text[:80],
284
+ )
285
+ _notify(STATUS_ERROR, "Keine Sprache erkannt")
245
286
  return
246
287
 
247
288
  # LLM-Formatierung (falls Window-Mapping existiert)
@@ -262,7 +303,7 @@ class TranscriptionPipeline:
262
303
  except Exception as e:
263
304
  insert_ok = False
264
305
  log.error("Pipeline: Text-Einfuegung fehlgeschlagen: %s", e)
265
- _notify(STATUS_ERROR, "Text konnte nicht eingefuegt werden")
306
+ _notify(STATUS_ERROR, f"Text-Einfuegung fehlgeschlagen: {e}")
266
307
 
267
308
  def _do_insert_stream(chunks_iter: object) -> None:
268
309
  nonlocal insert_ok
@@ -271,7 +312,7 @@ class TranscriptionPipeline:
271
312
  except Exception as e:
272
313
  insert_ok = False
273
314
  log.error("Pipeline: Streaming-Einfuegung fehlgeschlagen: %s", e)
274
- _notify(STATUS_ERROR, "Text konnte nicht eingefuegt werden")
315
+ _notify(STATUS_ERROR, f"Text-Einfuegung fehlgeschlagen: {e}")
275
316
 
276
317
  if system_prompt and self._llm:
277
318
  _notify(STATUS_LLM_START)
@@ -299,6 +340,7 @@ class TranscriptionPipeline:
299
340
 
300
341
  except ProviderError as e:
301
342
  log.warning("Pipeline: LLM-Fehler - Fallback auf Rohtext: %s", e)
343
+ _notify(STATUS_LLM_FALLBACK, str(e))
302
344
  _do_insert(text)
303
345
  else:
304
346
  # Kein Mapping oder kein LLM-Provider -> Rohtext direkt einfuegen
@@ -0,0 +1,273 @@
1
+ """GroqCloud API-Provider für PayPerTranscript.
2
+
3
+ Implementiert STT (Whisper) und LLM-Formatierung über die GroqCloud API.
4
+ """
5
+
6
+ import time
7
+ from collections.abc import Iterator
8
+ from pathlib import Path
9
+
10
+ import groq
11
+
12
+ from paypertranscript.core.logging import get_logger
13
+ from paypertranscript.providers.base import AbstractLLMProvider, AbstractSTTProvider, ProviderError
14
+
15
+ log = get_logger("providers.groq")
16
+
17
+ # Retry-Konfiguration fuer transiente API-Fehler
18
+ _MAX_RETRIES = 3
19
+ _RETRY_BASE_DELAY = 1.0 # Sekunden (exponential: 1s, 2s, 4s)
20
+ _RETRYABLE_ERRORS = (groq.RateLimitError, groq.APITimeoutError, groq.APIConnectionError)
21
+
22
+ # Minimale WAV-Dateigroesse (44 Bytes = WAV-Header ohne Audio-Daten)
23
+ _MIN_WAV_SIZE = 44
24
+
25
+
26
+ class GroqSTTProvider(AbstractSTTProvider):
27
+ """GroqCloud Whisper STT-Provider.
28
+
29
+ Nutzt whisper-large-v3-turbo für Speech-to-Text.
30
+ Der Groq-Client wird einmal instanziiert und wiederverwendet
31
+ (Connection Pooling via httpx).
32
+ """
33
+
34
+ def __init__(
35
+ self,
36
+ api_key: str | None = None,
37
+ model: str = "whisper-large-v3-turbo",
38
+ ) -> None:
39
+ self._model = model
40
+ try:
41
+ self._client = groq.Groq(api_key=api_key)
42
+ except groq.GroqError as e:
43
+ raise ProviderError(f"Groq-Client konnte nicht erstellt werden: {e}") from e
44
+ log.info("GroqSTTProvider initialisiert (Modell: %s)", self._model)
45
+
46
+ def transcribe(self, audio_path: Path, language: str, prompt: str = "") -> str:
47
+ """Transkribiert eine WAV-Datei via GroqCloud Whisper API."""
48
+ if not audio_path.exists():
49
+ raise ProviderError(f"Audio-Datei nicht gefunden: {audio_path}")
50
+
51
+ # V05: Audio-Datei validieren (WAV-Header = 44 Bytes, leere Datei vermeiden)
52
+ file_size = audio_path.stat().st_size
53
+ if file_size <= _MIN_WAV_SIZE:
54
+ raise ProviderError(
55
+ f"Audio-Datei ist leer oder beschädigt ({file_size} Bytes)"
56
+ )
57
+
58
+ log.info(
59
+ "STT-Anfrage: %s (Sprache: %s, Modell: %s)",
60
+ audio_path.name,
61
+ language,
62
+ self._model,
63
+ )
64
+ if prompt:
65
+ log.info("STT-Prompt: %s", prompt)
66
+
67
+ # V01: Retry-Loop fuer transiente Fehler
68
+ last_error: Exception | None = None
69
+ for attempt in range(1, _MAX_RETRIES + 1):
70
+ try:
71
+ with open(audio_path, "rb") as audio_file:
72
+ transcription = self._client.audio.transcriptions.create(
73
+ model=self._model,
74
+ file=audio_file,
75
+ language=language,
76
+ prompt=prompt,
77
+ response_format="text",
78
+ temperature=0.0,
79
+ )
80
+ break # Erfolg
81
+ except groq.AuthenticationError as e:
82
+ raise ProviderError(f"API-Key ungültig: {e}") from e
83
+ except _RETRYABLE_ERRORS as e:
84
+ last_error = e
85
+ if attempt < _MAX_RETRIES:
86
+ delay = _RETRY_BASE_DELAY * (2 ** (attempt - 1))
87
+ log.warning(
88
+ "STT-Versuch %d/%d fehlgeschlagen: %s - Retry in %.1fs",
89
+ attempt, _MAX_RETRIES, e, delay,
90
+ )
91
+ time.sleep(delay)
92
+ else:
93
+ log.error("STT: Alle %d Versuche fehlgeschlagen", _MAX_RETRIES)
94
+ except groq.APIError as e:
95
+ raise ProviderError(f"GroqCloud API-Fehler: {e}") from e
96
+ else:
97
+ # Alle Retries erschoepft
98
+ e = last_error
99
+ if isinstance(e, groq.RateLimitError):
100
+ raise ProviderError(f"Rate Limit erreicht: {e}") from e
101
+ elif isinstance(e, groq.APITimeoutError):
102
+ raise ProviderError(f"GroqCloud Timeout: {e}") from e
103
+ else:
104
+ raise ProviderError(f"Keine Verbindung zu GroqCloud: {e}") from e
105
+
106
+ # response_format="text" gibt direkt einen String zurück
107
+ text = transcription.strip() if isinstance(transcription, str) else transcription.text.strip()
108
+
109
+ log.info("STT-Ergebnis: %d Zeichen", len(text))
110
+ return text
111
+
112
+
113
+ class GroqLLMProvider(AbstractLLMProvider):
114
+ """GroqCloud LLM-Provider für Textformatierung.
115
+
116
+ Nutzt openai/gpt-oss-20b für kontextabhängige Formatierung.
117
+ Der Groq-Client wird einmal instanziiert und wiederverwendet.
118
+ """
119
+
120
+ def __init__(
121
+ self,
122
+ api_key: str | None = None,
123
+ model: str = "openai/gpt-oss-20b",
124
+ temperature: float | None = None,
125
+ ) -> None:
126
+ self._model = model
127
+ self._temperature = temperature
128
+ self._last_usage: dict[str, int] | None = None
129
+ try:
130
+ self._client = groq.Groq(api_key=api_key)
131
+ except groq.GroqError as e:
132
+ raise ProviderError(f"Groq-Client konnte nicht erstellt werden: {e}") from e
133
+ log.info("GroqLLMProvider initialisiert (Modell: %s, Temperature: %s)", self._model, self._temperature)
134
+
135
+ @property
136
+ def last_usage(self) -> dict[str, int] | None:
137
+ """Token-Usage der letzten LLM-Anfrage."""
138
+ return self._last_usage
139
+
140
+ def _build_messages(
141
+ self, system_prompt: str, text: str
142
+ ) -> list[dict[str, str]]:
143
+ return [
144
+ {"role": "system", "content": system_prompt},
145
+ {"role": "user", "content": f"<transcript>{text}</transcript>"},
146
+ ]
147
+
148
+ def _completion_kwargs(self) -> dict:
149
+ """Baut gemeinsame kwargs für chat.completions.create."""
150
+ kwargs: dict = {}
151
+ if self._temperature is not None:
152
+ kwargs["temperature"] = self._temperature
153
+ return kwargs
154
+
155
+ def format_text(self, system_prompt: str, text: str) -> str:
156
+ log.info("LLM-Anfrage (non-streaming, Modell: %s, Temperature: %s)", self._model, self._temperature)
157
+ self._last_usage = None
158
+
159
+ # V01: Retry-Loop fuer transiente Fehler
160
+ last_error: Exception | None = None
161
+ for attempt in range(1, _MAX_RETRIES + 1):
162
+ try:
163
+ response = self._client.chat.completions.create(
164
+ model=self._model,
165
+ messages=self._build_messages(system_prompt, text),
166
+ stream=False,
167
+ **self._completion_kwargs(),
168
+ )
169
+ break # Erfolg
170
+ except groq.AuthenticationError as e:
171
+ raise ProviderError(f"API-Key ungültig: {e}") from e
172
+ except _RETRYABLE_ERRORS as e:
173
+ last_error = e
174
+ if attempt < _MAX_RETRIES:
175
+ delay = _RETRY_BASE_DELAY * (2 ** (attempt - 1))
176
+ log.warning(
177
+ "LLM-Versuch %d/%d fehlgeschlagen: %s - Retry in %.1fs",
178
+ attempt, _MAX_RETRIES, e, delay,
179
+ )
180
+ time.sleep(delay)
181
+ else:
182
+ log.error("LLM: Alle %d Versuche fehlgeschlagen", _MAX_RETRIES)
183
+ except groq.APIError as e:
184
+ raise ProviderError(f"GroqCloud API-Fehler: {e}") from e
185
+ else:
186
+ e = last_error
187
+ if isinstance(e, groq.RateLimitError):
188
+ raise ProviderError(f"Rate Limit erreicht: {e}") from e
189
+ elif isinstance(e, groq.APITimeoutError):
190
+ raise ProviderError(f"GroqCloud Timeout: {e}") from e
191
+ else:
192
+ raise ProviderError(f"Keine Verbindung zu GroqCloud: {e}") from e
193
+
194
+ # Usage-Daten erfassen
195
+ if hasattr(response, "usage") and response.usage:
196
+ self._last_usage = {
197
+ "prompt_tokens": response.usage.prompt_tokens or 0,
198
+ "completion_tokens": response.usage.completion_tokens or 0,
199
+ }
200
+
201
+ result = response.choices[0].message.content or ""
202
+ result = result.strip()
203
+ log.info("LLM-Ergebnis: %d Zeichen", len(result))
204
+ return result
205
+
206
+ def format_text_stream(self, system_prompt: str, text: str) -> Iterator[str]:
207
+ log.info("LLM-Anfrage (streaming, Modell: %s, Temperature: %s)", self._model, self._temperature)
208
+ self._last_usage = None
209
+
210
+ # V01: Retry-Loop fuer transiente Fehler beim Stream-Aufbau
211
+ last_error: Exception | None = None
212
+ for attempt in range(1, _MAX_RETRIES + 1):
213
+ try:
214
+ stream = self._client.chat.completions.create(
215
+ model=self._model,
216
+ messages=self._build_messages(system_prompt, text),
217
+ stream=True,
218
+ **self._completion_kwargs(),
219
+ )
220
+ break # Erfolg
221
+ except groq.AuthenticationError as e:
222
+ raise ProviderError(f"API-Key ungültig: {e}") from e
223
+ except _RETRYABLE_ERRORS as e:
224
+ last_error = e
225
+ if attempt < _MAX_RETRIES:
226
+ delay = _RETRY_BASE_DELAY * (2 ** (attempt - 1))
227
+ log.warning(
228
+ "LLM-Stream-Versuch %d/%d fehlgeschlagen: %s - Retry in %.1fs",
229
+ attempt, _MAX_RETRIES, e, delay,
230
+ )
231
+ time.sleep(delay)
232
+ else:
233
+ log.error("LLM-Stream: Alle %d Versuche fehlgeschlagen", _MAX_RETRIES)
234
+ except groq.APIError as e:
235
+ raise ProviderError(f"GroqCloud API-Fehler: {e}") from e
236
+ else:
237
+ e = last_error
238
+ if isinstance(e, groq.RateLimitError):
239
+ raise ProviderError(f"Rate Limit erreicht: {e}") from e
240
+ elif isinstance(e, groq.APITimeoutError):
241
+ raise ProviderError(f"GroqCloud Timeout: {e}") from e
242
+ else:
243
+ raise ProviderError(f"Keine Verbindung zu GroqCloud: {e}") from e
244
+
245
+ # V02: Stream-Iteration in try/except — Verbindungsabbruch waehrend Streaming erkennen
246
+ total_chars = 0
247
+ try:
248
+ for chunk in stream:
249
+ delta = chunk.choices[0].delta.content
250
+ if delta:
251
+ total_chars += len(delta)
252
+ yield delta
253
+ # Groq streaming: Usage im letzten Chunk via x_groq
254
+ if (
255
+ hasattr(chunk, "x_groq")
256
+ and chunk.x_groq
257
+ and hasattr(chunk.x_groq, "usage")
258
+ and chunk.x_groq.usage
259
+ ):
260
+ usage = chunk.x_groq.usage
261
+ self._last_usage = {
262
+ "prompt_tokens": getattr(usage, "prompt_tokens", 0) or 0,
263
+ "completion_tokens": getattr(usage, "completion_tokens", 0) or 0,
264
+ }
265
+ except groq.APIError as e:
266
+ raise ProviderError(
267
+ f"LLM-Stream abgebrochen nach {total_chars} Zeichen: {e}"
268
+ ) from e
269
+ except Exception as e:
270
+ raise ProviderError(
271
+ f"LLM-Stream abgebrochen nach {total_chars} Zeichen: {e}"
272
+ ) from e
273
+ log.info("LLM-Stream abgeschlossen: %d Zeichen", total_chars)
@@ -29,6 +29,7 @@ from paypertranscript.core.window_detector import WindowInfo, get_foreground_win
29
29
  from paypertranscript.pipeline.transcription import (
30
30
  STATUS_DONE,
31
31
  STATUS_ERROR,
32
+ STATUS_LLM_FALLBACK,
32
33
  STATUS_LLM_START,
33
34
  STATUS_STT_START,
34
35
  TranscriptionPipeline,
@@ -76,6 +77,7 @@ class AppSignals(QObject):
76
77
  formatting_started = Signal()
77
78
  processing_done = Signal()
78
79
  processing_error = Signal(str)
80
+ done_message = Signal(str)
79
81
  update_available = Signal(str)
80
82
  update_not_available = Signal()
81
83
 
@@ -144,6 +146,8 @@ class PayPerTranscriptApp:
144
146
  show_done_overlay=self._overlay.show_done,
145
147
  on_update_check=self.trigger_update_check,
146
148
  on_perform_update=self.perform_update_and_restart,
149
+ get_last_wav_path=self._get_last_wav_path,
150
+ on_retranscribe_precise=self._retranscribe_precise,
147
151
  )
148
152
 
149
153
  # Amplitude-Polling-Timer (30fps, laeuft nur waehrend Aufnahme)
@@ -154,6 +158,10 @@ class PayPerTranscriptApp:
154
158
  self._connect_signals()
155
159
  self._tray.show()
156
160
 
161
+ # V04: LLM-Initialisierungs-Warnung als Overlay anzeigen (nach Tray-Erstellung)
162
+ if self._llm_init_warning:
163
+ self._overlay.show_error("LLM deaktiviert")
164
+
157
165
  # Periodischer Update-Check
158
166
  self._update_timer = QTimer()
159
167
  check_hours = self._config.get("updates.check_interval_hours", 24)
@@ -236,6 +244,7 @@ class PayPerTranscriptApp:
236
244
 
237
245
  # LLM-Provider erstellen (optional)
238
246
  self._llm_provider = None
247
+ self._llm_init_warning: str | None = None
239
248
  try:
240
249
  self._llm_provider = create_llm_provider(
241
250
  config.get("api.provider", "groq"),
@@ -247,6 +256,10 @@ class PayPerTranscriptApp:
247
256
  log.warning(
248
257
  "LLM-Provider konnte nicht erstellt werden: %s - LLM-Formatierung deaktiviert", e
249
258
  )
259
+ self._llm_init_warning = (
260
+ "LLM-Formatierung deaktiviert — Rohtext wird eingefügt.\n"
261
+ f"Grund: {e}"
262
+ )
250
263
 
251
264
  # Session-Logger
252
265
  self._session_logger = SessionLogger()
@@ -282,6 +295,58 @@ class PayPerTranscriptApp:
282
295
  """Gibt die letzte Transkription aus der Pipeline zurueck."""
283
296
  return self._pipeline.last_transcription
284
297
 
298
+ def _get_last_wav_path(self):
299
+ """Gibt den Pfad der letzten WAV-Datei zurueck (oder None)."""
300
+ path = self._pipeline.last_wav_path
301
+ if path and path.exists():
302
+ return path
303
+ return None
304
+
305
+ def _retranscribe_precise(self) -> None:
306
+ """Transkribiert die letzte Aufnahme erneut mit whisper-large-v3."""
307
+ import threading
308
+
309
+ wav_path = self._get_last_wav_path()
310
+ if not wav_path:
311
+ self._signals.processing_error.emit("Keine Aufnahme")
312
+ return
313
+
314
+ def _worker():
315
+ try:
316
+ self._signals.recording_stopped.emit() # Overlay: "Transkribiere..."
317
+
318
+ api_key = load_api_key() or os.environ.get("GROQ_API_KEY")
319
+ stt = create_stt_provider(
320
+ self._config.get("api.provider", "groq"),
321
+ model="whisper-large-v3",
322
+ api_key=api_key,
323
+ )
324
+
325
+ language = self._config.get("general.language", "de")
326
+ words = self._config.get("words.misspelled_words", [])
327
+ from paypertranscript.pipeline.transcription import _build_word_list_prompt
328
+ prompt = _build_word_list_prompt(words)
329
+
330
+ text = stt.transcribe(wav_path, language=language, prompt=prompt)
331
+ if not text:
332
+ self._signals.processing_error.emit("Kein Text erkannt")
333
+ return
334
+
335
+ import pyperclip
336
+ pyperclip.copy(text)
337
+ self._pipeline.last_transcription = text
338
+
339
+ self._signals.processing_done.emit()
340
+ self._signals.done_message.emit("Kopiert")
341
+ except ProviderError as e:
342
+ log.error("Re-Transkription fehlgeschlagen: %s", e)
343
+ self._signals.processing_error.emit(f"Re-Transkription fehlgeschlagen: {e}")
344
+ except Exception as e:
345
+ log.error("Re-Transkription: Unerwarteter Fehler: %s", e, exc_info=True)
346
+ self._signals.processing_error.emit("Re-Transkription fehlgeschlagen")
347
+
348
+ threading.Thread(target=_worker, daemon=True, name="retranscribe-precise").start()
349
+
285
350
  def _connect_signals(self) -> None:
286
351
  """Verbindet AppSignals mit Tray- und Overlay-Slots."""
287
352
  # Tray
@@ -300,6 +365,9 @@ class PayPerTranscriptApp:
300
365
  self._signals.processing_done.connect(self._overlay.show_done)
301
366
  self._signals.processing_error.connect(self._overlay.show_error)
302
367
 
368
+ # Done mit Text (z.B. "Kopiert")
369
+ self._signals.done_message.connect(self._overlay.show_done_message)
370
+
303
371
  # Update
304
372
  self._signals.update_available.connect(self._tray.on_update_available)
305
373
  self._signals.update_not_available.connect(self._tray.on_update_not_available)
@@ -334,7 +402,7 @@ class PayPerTranscriptApp:
334
402
  self._signals.recording_stopped.emit()
335
403
 
336
404
  if audio is None:
337
- self._signals.processing_done.emit()
405
+ self._signals.processing_error.emit("Keine Aufnahme")
338
406
  return
339
407
 
340
408
  actual_duration = len(audio) / 16000
@@ -345,7 +413,7 @@ class PayPerTranscriptApp:
345
413
  MIN_RECORDING_DURATION,
346
414
  )
347
415
  self._current_window = None
348
- self._signals.processing_done.emit()
416
+ self._signals.processing_error.emit("Aufnahme zu kurz")
349
417
  return
350
418
 
351
419
  # WAV speichern
@@ -365,10 +433,6 @@ class PayPerTranscriptApp:
365
433
  "Aufnahme sehr lang (%.0fs) - wird trotzdem gesendet",
366
434
  actual_duration,
367
435
  )
368
- self._tray.show_info(
369
- f"Lange Aufnahme ({actual_duration / 60:.0f} Min). "
370
- "Wird trotzdem gesendet."
371
- )
372
436
 
373
437
  # Pipeline in Hintergrund-Thread starten (non-blocking)
374
438
  self._pipeline.process_async(
@@ -392,6 +456,8 @@ class PayPerTranscriptApp:
392
456
  self._signals.formatting_started.emit()
393
457
  elif status == STATUS_DONE:
394
458
  self._signals.processing_done.emit()
459
+ elif status == STATUS_LLM_FALLBACK:
460
+ log.info("LLM-Fallback auf Rohtext: %s", detail)
395
461
  elif status == STATUS_ERROR:
396
462
  self._signals.processing_error.emit(detail or "Transkription fehlgeschlagen")
397
463
 
@@ -51,6 +51,7 @@ _FPS_INTERVAL = 16 # ~60fps
51
51
  _FADE_IN_MS = 120
52
52
  _FADE_OUT_MS = 200 # Smooth fade-out (nicht abrupt)
53
53
  _DONE_SHOW_MS = 700 # Kurz sichtbar, dann smooth weg
54
+ _DONE_MSG_SHOW_MS = 1200 # Done mit Text: laenger sichtbar damit lesbar
54
55
  _ERROR_SHOW_MS = 2500
55
56
 
56
57
  # -- Visualizer --
@@ -76,6 +77,7 @@ class StatusOverlay(QWidget):
76
77
  TRANSCRIBING = "transcribing"
77
78
  FORMATTING = "formatting"
78
79
  DONE = "done"
80
+ DONE_MESSAGE = "done_message"
79
81
  ERROR = "error"
80
82
 
81
83
  def __init__(self, config: ConfigManager, parent: QWidget | None = None) -> None:
@@ -83,6 +85,7 @@ class StatusOverlay(QWidget):
83
85
  self._config = config
84
86
  self._state: str | None = None
85
87
  self._error_message = ""
88
+ self._done_message = ""
86
89
 
87
90
  # Animation
88
91
  self._tick = 0
@@ -134,6 +137,12 @@ class StatusOverlay(QWidget):
134
137
  self._switch_state(self.DONE)
135
138
  self._hide_timer.start(_DONE_SHOW_MS)
136
139
 
140
+ @Slot(str)
141
+ def show_done_message(self, message: str) -> None:
142
+ self._done_message = message
143
+ self._switch_state(self.DONE_MESSAGE)
144
+ self._hide_timer.start(_DONE_MSG_SHOW_MS)
145
+
137
146
  @Slot(str)
138
147
  def show_error(self, message: str) -> None:
139
148
  self._error_message = message
@@ -263,13 +272,15 @@ class StatusOverlay(QWidget):
263
272
  self._draw_processing(p)
264
273
  elif self._state == self.DONE:
265
274
  self._draw_done(p)
275
+ elif self._state == self.DONE_MESSAGE:
276
+ self._draw_done_message(p)
266
277
  elif self._state == self.ERROR:
267
278
  self._draw_error(p)
268
279
 
269
280
  p.end()
270
281
 
271
282
  def _accent_color(self) -> QColor:
272
- if self._state == self.DONE:
283
+ if self._state in (self.DONE, self.DONE_MESSAGE):
273
284
  return _GREEN
274
285
  if self._state == self.ERROR:
275
286
  return _RED
@@ -365,6 +376,37 @@ class StatusOverlay(QWidget):
365
376
  p.drawLine(int(cx - 6), int(cy), int(cx - 2), int(cy + 5))
366
377
  p.drawLine(int(cx - 2), int(cy + 5), int(cx + 7), int(cy - 4))
367
378
 
379
+ def _draw_done_message(self, p: QPainter) -> None:
380
+ """Gruener Checkmark + Text (z.B. 'Kopiert')."""
381
+ ix = 24
382
+ cy = _HEIGHT / 2
383
+
384
+ # Glow
385
+ glow = QRadialGradient(ix, cy, 12)
386
+ g = QColor(_GREEN)
387
+ g.setAlpha(30)
388
+ glow.setColorAt(0.0, g)
389
+ glow.setColorAt(1.0, QColor(0, 0, 0, 0))
390
+ p.setPen(Qt.PenStyle.NoPen)
391
+ p.setBrush(glow)
392
+ p.drawEllipse(QRectF(ix - 12, cy - 12, 24, 24))
393
+
394
+ # Checkmark
395
+ pen = QPen(_GREEN, 2.0, Qt.PenStyle.SolidLine,
396
+ Qt.PenCapStyle.RoundCap, Qt.PenJoinStyle.RoundJoin)
397
+ p.setPen(pen)
398
+ p.drawLine(int(ix - 6), int(cy), int(ix - 2), int(cy + 5))
399
+ p.drawLine(int(ix - 2), int(cy + 5), int(ix + 7), int(cy - 4))
400
+
401
+ # Text
402
+ font = QFont("Segoe UI", 8)
403
+ font.setWeight(QFont.Weight.Medium)
404
+ p.setFont(font)
405
+ p.setPen(QPen(_TEXT_PRIMARY))
406
+ msg = self._done_message or "OK"
407
+ p.drawText(QRectF(ix + 14, 0, _WIDTH - ix - 24, _HEIGHT),
408
+ Qt.AlignmentFlag.AlignVCenter | Qt.TextFlag.TextSingleLine, msg)
409
+
368
410
  def _draw_error(self, p: QPainter) -> None:
369
411
  """Rotes X + Fehlermeldung."""
370
412
  ix = 24
@@ -178,6 +178,8 @@ class SystemTray:
178
178
  show_done_overlay: Callable[[], None] | None = None,
179
179
  on_update_check: Callable[[], None] | None = None,
180
180
  on_perform_update: Callable[[], None] | None = None,
181
+ get_last_wav_path: Callable | None = None,
182
+ on_retranscribe_precise: Callable[[], None] | None = None,
181
183
  parent: QWidget | None = None,
182
184
  ) -> None:
183
185
  self._config = config
@@ -187,6 +189,8 @@ class SystemTray:
187
189
  self._show_done_overlay = show_done_overlay
188
190
  self._on_update_check = on_update_check
189
191
  self._on_perform_update = on_perform_update
192
+ self._get_last_wav_path = get_last_wav_path
193
+ self._on_retranscribe_precise = on_retranscribe_precise
190
194
  self._icons = create_tray_icons()
191
195
 
192
196
  # MainWindow (lazy creation)
@@ -207,15 +211,6 @@ class SystemTray:
207
211
  self._tray.show()
208
212
  log.info("System Tray angezeigt")
209
213
 
210
- def show_info(self, message: str) -> None:
211
- """Zeigt eine Info-Benachrichtigung im System Tray."""
212
- self._tray.showMessage(
213
- "PayPerTranscript",
214
- message,
215
- QSystemTrayIcon.MessageIcon.Information,
216
- 3000,
217
- )
218
-
219
214
  def hide(self) -> None:
220
215
  """Versteckt das Tray-Icon."""
221
216
  self._tray.hide()
@@ -247,6 +242,8 @@ class SystemTray:
247
242
  self._tray.setToolTip(self._build_tooltip())
248
243
  if self._get_last_transcription and self._get_last_transcription():
249
244
  self._act_copy_last.setEnabled(True)
245
+ if self._get_last_wav_path and self._get_last_wav_path():
246
+ self._act_retranscribe.setEnabled(True)
250
247
 
251
248
  @Slot(str)
252
249
  def on_processing_error(self, message: str) -> None:
@@ -271,6 +268,10 @@ class SystemTray:
271
268
  self._act_copy_last.triggered.connect(self._on_copy_last_transcription)
272
269
  self._act_copy_last.setEnabled(False)
273
270
 
271
+ self._act_retranscribe = self._menu.addAction("Erneut transkribieren (Pr\u00e4zise)")
272
+ self._act_retranscribe.triggered.connect(self._on_retranscribe)
273
+ self._act_retranscribe.setEnabled(False)
274
+
274
275
  self._menu.addSeparator()
275
276
 
276
277
  act_quit = self._menu.addAction("Beenden")
@@ -321,6 +322,11 @@ class SystemTray:
321
322
  else:
322
323
  log.info("Keine Transkription zum Kopieren vorhanden")
323
324
 
325
+ def _on_retranscribe(self) -> None:
326
+ """Startet Re-Transkription mit praezisem Modell."""
327
+ if self._on_retranscribe_precise:
328
+ self._on_retranscribe_precise()
329
+
324
330
  # -- Update-Callbacks --
325
331
 
326
332
  def _get_update_dialog(self) -> _UpdateInfoDialog:
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "PayPerTranscript"
7
- version = "0.2.9"
7
+ version = "0.3.0"
8
8
  description = "Open-Source Voice-to-Text mit Pay-per-Use Pricing"
9
9
  license = "MIT"
10
10
  requires-python = ">=3.12"
@@ -1,193 +0,0 @@
1
- """GroqCloud API-Provider für PayPerTranscript.
2
-
3
- Implementiert STT (Whisper) und LLM-Formatierung über die GroqCloud API.
4
- """
5
-
6
- from collections.abc import Iterator
7
- from pathlib import Path
8
-
9
- import groq
10
-
11
- from paypertranscript.core.logging import get_logger
12
- from paypertranscript.providers.base import AbstractLLMProvider, AbstractSTTProvider, ProviderError
13
-
14
- log = get_logger("providers.groq")
15
-
16
-
17
- class GroqSTTProvider(AbstractSTTProvider):
18
- """GroqCloud Whisper STT-Provider.
19
-
20
- Nutzt whisper-large-v3-turbo für Speech-to-Text.
21
- Der Groq-Client wird einmal instanziiert und wiederverwendet
22
- (Connection Pooling via httpx).
23
- """
24
-
25
- def __init__(
26
- self,
27
- api_key: str | None = None,
28
- model: str = "whisper-large-v3-turbo",
29
- ) -> None:
30
- self._model = model
31
- try:
32
- self._client = groq.Groq(api_key=api_key)
33
- except groq.GroqError as e:
34
- raise ProviderError(f"Groq-Client konnte nicht erstellt werden: {e}") from e
35
- log.info("GroqSTTProvider initialisiert (Modell: %s)", self._model)
36
-
37
- def transcribe(self, audio_path: Path, language: str, prompt: str = "") -> str:
38
- """Transkribiert eine WAV-Datei via GroqCloud Whisper API."""
39
- if not audio_path.exists():
40
- raise ProviderError(f"Audio-Datei nicht gefunden: {audio_path}")
41
-
42
- log.info(
43
- "STT-Anfrage: %s (Sprache: %s, Modell: %s)",
44
- audio_path.name,
45
- language,
46
- self._model,
47
- )
48
- if prompt:
49
- log.info("STT-Prompt: %s", prompt)
50
-
51
- try:
52
- with open(audio_path, "rb") as audio_file:
53
- transcription = self._client.audio.transcriptions.create(
54
- model=self._model,
55
- file=audio_file,
56
- language=language,
57
- prompt=prompt,
58
- response_format="text",
59
- temperature=0.0,
60
- )
61
- except groq.AuthenticationError as e:
62
- raise ProviderError(f"API-Key ungültig: {e}") from e
63
- except groq.RateLimitError as e:
64
- raise ProviderError(f"Rate Limit erreicht: {e}") from e
65
- except groq.APIConnectionError as e:
66
- raise ProviderError(f"Keine Verbindung zu GroqCloud: {e}") from e
67
- except groq.APITimeoutError as e:
68
- raise ProviderError(f"GroqCloud Timeout: {e}") from e
69
- except groq.APIError as e:
70
- raise ProviderError(f"GroqCloud API-Fehler: {e}") from e
71
-
72
- # response_format="text" gibt direkt einen String zurück
73
- text = transcription.strip() if isinstance(transcription, str) else transcription.text.strip()
74
-
75
- log.info("STT-Ergebnis: %d Zeichen", len(text))
76
- return text
77
-
78
-
79
- class GroqLLMProvider(AbstractLLMProvider):
80
- """GroqCloud LLM-Provider für Textformatierung.
81
-
82
- Nutzt openai/gpt-oss-20b für kontextabhängige Formatierung.
83
- Der Groq-Client wird einmal instanziiert und wiederverwendet.
84
- """
85
-
86
- def __init__(
87
- self,
88
- api_key: str | None = None,
89
- model: str = "openai/gpt-oss-20b",
90
- temperature: float | None = None,
91
- ) -> None:
92
- self._model = model
93
- self._temperature = temperature
94
- self._last_usage: dict[str, int] | None = None
95
- try:
96
- self._client = groq.Groq(api_key=api_key)
97
- except groq.GroqError as e:
98
- raise ProviderError(f"Groq-Client konnte nicht erstellt werden: {e}") from e
99
- log.info("GroqLLMProvider initialisiert (Modell: %s, Temperature: %s)", self._model, self._temperature)
100
-
101
- @property
102
- def last_usage(self) -> dict[str, int] | None:
103
- """Token-Usage der letzten LLM-Anfrage."""
104
- return self._last_usage
105
-
106
- def _build_messages(
107
- self, system_prompt: str, text: str
108
- ) -> list[dict[str, str]]:
109
- return [
110
- {"role": "system", "content": system_prompt},
111
- {"role": "user", "content": f"<transcript>{text}</transcript>"},
112
- ]
113
-
114
- def _completion_kwargs(self) -> dict:
115
- """Baut gemeinsame kwargs für chat.completions.create."""
116
- kwargs: dict = {}
117
- if self._temperature is not None:
118
- kwargs["temperature"] = self._temperature
119
- return kwargs
120
-
121
- def format_text(self, system_prompt: str, text: str) -> str:
122
- log.info("LLM-Anfrage (non-streaming, Modell: %s, Temperature: %s)", self._model, self._temperature)
123
- self._last_usage = None
124
- try:
125
- response = self._client.chat.completions.create(
126
- model=self._model,
127
- messages=self._build_messages(system_prompt, text),
128
- stream=False,
129
- **self._completion_kwargs(),
130
- )
131
- except groq.AuthenticationError as e:
132
- raise ProviderError(f"API-Key ungültig: {e}") from e
133
- except groq.RateLimitError as e:
134
- raise ProviderError(f"Rate Limit erreicht: {e}") from e
135
- except groq.APIConnectionError as e:
136
- raise ProviderError(f"Keine Verbindung zu GroqCloud: {e}") from e
137
- except groq.APITimeoutError as e:
138
- raise ProviderError(f"GroqCloud Timeout: {e}") from e
139
- except groq.APIError as e:
140
- raise ProviderError(f"GroqCloud API-Fehler: {e}") from e
141
-
142
- # Usage-Daten erfassen
143
- if hasattr(response, "usage") and response.usage:
144
- self._last_usage = {
145
- "prompt_tokens": response.usage.prompt_tokens or 0,
146
- "completion_tokens": response.usage.completion_tokens or 0,
147
- }
148
-
149
- result = response.choices[0].message.content or ""
150
- result = result.strip()
151
- log.info("LLM-Ergebnis: %d Zeichen", len(result))
152
- return result
153
-
154
- def format_text_stream(self, system_prompt: str, text: str) -> Iterator[str]:
155
- log.info("LLM-Anfrage (streaming, Modell: %s, Temperature: %s)", self._model, self._temperature)
156
- self._last_usage = None
157
- try:
158
- stream = self._client.chat.completions.create(
159
- model=self._model,
160
- messages=self._build_messages(system_prompt, text),
161
- stream=True,
162
- **self._completion_kwargs(),
163
- )
164
- except groq.AuthenticationError as e:
165
- raise ProviderError(f"API-Key ungültig: {e}") from e
166
- except groq.RateLimitError as e:
167
- raise ProviderError(f"Rate Limit erreicht: {e}") from e
168
- except groq.APIConnectionError as e:
169
- raise ProviderError(f"Keine Verbindung zu GroqCloud: {e}") from e
170
- except groq.APITimeoutError as e:
171
- raise ProviderError(f"GroqCloud Timeout: {e}") from e
172
- except groq.APIError as e:
173
- raise ProviderError(f"GroqCloud API-Fehler: {e}") from e
174
-
175
- total_chars = 0
176
- for chunk in stream:
177
- delta = chunk.choices[0].delta.content
178
- if delta:
179
- total_chars += len(delta)
180
- yield delta
181
- # Groq streaming: Usage im letzten Chunk via x_groq
182
- if (
183
- hasattr(chunk, "x_groq")
184
- and chunk.x_groq
185
- and hasattr(chunk.x_groq, "usage")
186
- and chunk.x_groq.usage
187
- ):
188
- usage = chunk.x_groq.usage
189
- self._last_usage = {
190
- "prompt_tokens": getattr(usage, "prompt_tokens", 0) or 0,
191
- "completion_tokens": getattr(usage, "completion_tokens", 0) or 0,
192
- }
193
- log.info("LLM-Stream abgeschlossen: %d Zeichen", total_chars)