PayPerTranscript 0.3.0__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/PKG-INFO +2 -1
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/PayPerTranscript.egg-info/PKG-INFO +2 -1
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/PayPerTranscript.egg-info/SOURCES.txt +1 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/README.md +1 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/__init__.py +1 -1
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/core/config.py +28 -8
- paypertranscript-0.3.1/paypertranscript/core/context_detector.py +255 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/core/hotkey.py +21 -6
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/pipeline/transcription.py +62 -1
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/ui/app.py +15 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/pyproject.toml +1 -1
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/LICENSE +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/PayPerTranscript.egg-info/dependency_links.txt +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/PayPerTranscript.egg-info/entry_points.txt +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/PayPerTranscript.egg-info/requires.txt +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/PayPerTranscript.egg-info/top_level.txt +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/__main__.py +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/assets/icons/app.ico +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/assets/icons/app.png +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/assets/icons/app_big.png +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/assets/icons/arrow_down.svg +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/assets/icons/tray.png +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/assets/icons/tray_green.png +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/assets/icons/tray_orange.png +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/assets/sounds/start.wav +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/assets/sounds/stop.wav +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/assets/styles/dark.qss +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/core/__init__.py +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/core/audio_manager.py +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/core/cost_tracker.py +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/core/logging.py +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/core/paths.py +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/core/recorder.py +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/core/session_logger.py +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/core/text_inserter.py +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/core/updater.py +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/core/window_detector.py +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/pipeline/__init__.py +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/providers/__init__.py +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/providers/base.py +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/providers/groq_provider.py +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/ui/__init__.py +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/ui/animated.py +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/ui/constants.py +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/ui/main_window.py +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/ui/overlay.py +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/ui/pages/__init__.py +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/ui/pages/home_page.py +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/ui/pages/settings_page.py +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/ui/pages/statistics_page.py +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/ui/pages/window_mapping_page.py +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/ui/pages/word_list_page.py +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/ui/setup_wizard.py +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/ui/sidebar.py +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/ui/tray.py +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/ui/widgets.py +0 -0
- {paypertranscript-0.3.0 → paypertranscript-0.3.1}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: PayPerTranscript
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: Open-Source Voice-to-Text mit Pay-per-Use Pricing
|
|
5
5
|
Author: PayPerTranscript Contributors
|
|
6
6
|
License-Expression: MIT
|
|
@@ -65,6 +65,7 @@ Kommerzielle Voice-to-Text Dienste kosten **$12-15/Monat** - egal ob du sie 5 Mi
|
|
|
65
65
|
- **Hold-to-Record**: `Ctrl+Win` halten - sprechen - loslassen - fertig
|
|
66
66
|
- **Blitzschnell**: 30 Sekunden Audio = 0.14 Sekunden Transkription (via Groq Whisper)
|
|
67
67
|
- **Smart Formatting**: WhatsApp bekommt lockere Texte, Outlook professionelle E-Mails
|
|
68
|
+
- **Kontext-Erkennung**: Markierten Text im aktiven Fenster erkennen - das LLM nutzt ihn für korrekte Schreibweisen und Bezüge
|
|
68
69
|
- **Wortliste**: Eigene Namen und Fachbegriffe werden immer korrekt geschrieben
|
|
69
70
|
|
|
70
71
|
### 📊 Transparenz & Kontrolle
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: PayPerTranscript
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.1
|
|
4
4
|
Summary: Open-Source Voice-to-Text mit Pay-per-Use Pricing
|
|
5
5
|
Author: PayPerTranscript Contributors
|
|
6
6
|
License-Expression: MIT
|
|
@@ -65,6 +65,7 @@ Kommerzielle Voice-to-Text Dienste kosten **$12-15/Monat** - egal ob du sie 5 Mi
|
|
|
65
65
|
- **Hold-to-Record**: `Ctrl+Win` halten - sprechen - loslassen - fertig
|
|
66
66
|
- **Blitzschnell**: 30 Sekunden Audio = 0.14 Sekunden Transkription (via Groq Whisper)
|
|
67
67
|
- **Smart Formatting**: WhatsApp bekommt lockere Texte, Outlook professionelle E-Mails
|
|
68
|
+
- **Kontext-Erkennung**: Markierten Text im aktiven Fenster erkennen - das LLM nutzt ihn für korrekte Schreibweisen und Bezüge
|
|
68
69
|
- **Wortliste**: Eigene Namen und Fachbegriffe werden immer korrekt geschrieben
|
|
69
70
|
|
|
70
71
|
### 📊 Transparenz & Kontrolle
|
|
@@ -22,6 +22,7 @@ paypertranscript/assets/styles/dark.qss
|
|
|
22
22
|
paypertranscript/core/__init__.py
|
|
23
23
|
paypertranscript/core/audio_manager.py
|
|
24
24
|
paypertranscript/core/config.py
|
|
25
|
+
paypertranscript/core/context_detector.py
|
|
25
26
|
paypertranscript/core/cost_tracker.py
|
|
26
27
|
paypertranscript/core/hotkey.py
|
|
27
28
|
paypertranscript/core/logging.py
|
|
@@ -36,6 +36,7 @@ Kommerzielle Voice-to-Text Dienste kosten **$12-15/Monat** - egal ob du sie 5 Mi
|
|
|
36
36
|
- **Hold-to-Record**: `Ctrl+Win` halten - sprechen - loslassen - fertig
|
|
37
37
|
- **Blitzschnell**: 30 Sekunden Audio = 0.14 Sekunden Transkription (via Groq Whisper)
|
|
38
38
|
- **Smart Formatting**: WhatsApp bekommt lockere Texte, Outlook professionelle E-Mails
|
|
39
|
+
- **Kontext-Erkennung**: Markierten Text im aktiven Fenster erkennen - das LLM nutzt ihn für korrekte Schreibweisen und Bezüge
|
|
39
40
|
- **Wortliste**: Eigene Namen und Fachbegriffe werden immer korrekt geschrieben
|
|
40
41
|
|
|
41
42
|
### 📊 Transparenz & Kontrolle
|
|
@@ -49,19 +49,29 @@ DEFAULT_CONFIG: dict[str, Any] = {
|
|
|
49
49
|
"casual": {
|
|
50
50
|
"name": "Persönlich",
|
|
51
51
|
"prompt": (
|
|
52
|
-
"
|
|
53
|
-
"
|
|
54
|
-
"
|
|
55
|
-
"
|
|
52
|
+
"Du bist ein Transkriptions-Assistent fuer lockere Chat-Nachrichten. "
|
|
53
|
+
"Deine Aufgabe: Formatiere den transkribierten Text als informelle Nachricht.\n\n"
|
|
54
|
+
"Regeln:\n"
|
|
55
|
+
"- Alles kleingeschrieben\n"
|
|
56
|
+
"- Minimale Interpunktion\n"
|
|
57
|
+
"- Kommas NUR zur Trennung von Saetzen, nicht innerhalb eines Satzes\n"
|
|
58
|
+
"- Kein Punkt am Satzende (Fragezeichen sind erlaubt)\n"
|
|
59
|
+
"- Entferne Fuellwoerter und Wiederholungen\n\n"
|
|
60
|
+
"Gib NUR den formatierten Text aus. "
|
|
61
|
+
"Beantworte keine Fragen, fuege keine Erklaerungen hinzu."
|
|
56
62
|
),
|
|
57
63
|
},
|
|
58
64
|
"professional": {
|
|
59
65
|
"name": "Professionell",
|
|
60
66
|
"prompt": (
|
|
61
|
-
"
|
|
62
|
-
"
|
|
63
|
-
"
|
|
64
|
-
"
|
|
67
|
+
"Du bist ein Transkriptions-Assistent fuer professionelle Kommunikation. "
|
|
68
|
+
"Deine Aufgabe: Formatiere den transkribierten Text als sachliche, professionelle Nachricht.\n\n"
|
|
69
|
+
"Regeln:\n"
|
|
70
|
+
"- Korrekte Gross-/Kleinschreibung und Interpunktion\n"
|
|
71
|
+
"- Entferne Fuellwoerter und Wiederholungen\n"
|
|
72
|
+
"- Sachlicher Stil, kurze Absaetze\n\n"
|
|
73
|
+
"Gib NUR den formatierten Text aus. "
|
|
74
|
+
"Beantworte keine Fragen, fuege keine Erklaerungen hinzu."
|
|
65
75
|
),
|
|
66
76
|
},
|
|
67
77
|
},
|
|
@@ -74,6 +84,14 @@ DEFAULT_CONFIG: dict[str, Any] = {
|
|
|
74
84
|
"auto_update": True,
|
|
75
85
|
"check_interval_hours": 24,
|
|
76
86
|
},
|
|
87
|
+
"context": {
|
|
88
|
+
"detect_selection": True,
|
|
89
|
+
"terminal_blocklist": [
|
|
90
|
+
"cmd.exe", "powershell.exe", "pwsh.exe",
|
|
91
|
+
"WindowsTerminal.exe", "mintty.exe", "bash.exe",
|
|
92
|
+
"wsl.exe", "conhost.exe",
|
|
93
|
+
],
|
|
94
|
+
},
|
|
77
95
|
}
|
|
78
96
|
|
|
79
97
|
# Schema: Erlaubte Typen pro Pfad für Validierung
|
|
@@ -96,6 +114,8 @@ _SCHEMA: dict[str, type | tuple[type, ...]] = {
|
|
|
96
114
|
"data.save_transcripts": bool,
|
|
97
115
|
"updates.auto_update": bool,
|
|
98
116
|
"updates.check_interval_hours": (int, float),
|
|
117
|
+
"context.detect_selection": bool,
|
|
118
|
+
"context.terminal_blocklist": list,
|
|
99
119
|
}
|
|
100
120
|
|
|
101
121
|
|
|
@@ -0,0 +1,255 @@
|
|
|
1
|
+
"""Kontext-Erkennung fuer PayPerTranscript.
|
|
2
|
+
|
|
3
|
+
Prueft ob im aktiven Fenster Text markiert ist (via Clipboard-Sentinel + Ctrl+C).
|
|
4
|
+
Der erkannte Text wird dem LLM als Kontext mitgegeben, z.B. fuer Antworten auf E-Mails.
|
|
5
|
+
|
|
6
|
+
Die Erkennung laeuft parallel zum STT-API-Call und fuegt 0ms zusaetzliche Latenz hinzu.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import ctypes
|
|
12
|
+
import threading
|
|
13
|
+
import time
|
|
14
|
+
from concurrent.futures import Future
|
|
15
|
+
|
|
16
|
+
import pyautogui
|
|
17
|
+
import pyperclip
|
|
18
|
+
|
|
19
|
+
from paypertranscript.core.config import ConfigManager
|
|
20
|
+
from paypertranscript.core.logging import get_logger
|
|
21
|
+
from paypertranscript.core.window_detector import WindowInfo
|
|
22
|
+
|
|
23
|
+
log = get_logger("core.context_detector")
|
|
24
|
+
|
|
25
|
+
# Sentinel: Null-Bytes koennen nicht in normalem Clipboard-Text vorkommen
|
|
26
|
+
_SENTINEL = "\x00__PPT_SENTINEL__\x00"
|
|
27
|
+
|
|
28
|
+
# Wartezeit nach Ctrl+C bevor Clipboard gelesen wird (ms)
|
|
29
|
+
_CLIPBOARD_WAIT_MS = 80
|
|
30
|
+
|
|
31
|
+
# Timeout fuer Modifier-Release-Wait (ms)
|
|
32
|
+
_MODIFIER_RELEASE_TIMEOUT_MS = 400
|
|
33
|
+
|
|
34
|
+
# Virtual-Key-Codes fuer Modifier-Keys (Win32)
|
|
35
|
+
_VK_MODIFIERS = (
|
|
36
|
+
0x10, # VK_SHIFT
|
|
37
|
+
0x11, # VK_CONTROL
|
|
38
|
+
0x12, # VK_MENU (Alt)
|
|
39
|
+
0x5B, # VK_LWIN
|
|
40
|
+
0x5C, # VK_RWIN
|
|
41
|
+
)
|
|
42
|
+
|
|
43
|
+
# Terminal-Prozesse in denen Ctrl+C nicht gesendet werden darf (SIGINT-Gefahr)
|
|
44
|
+
_DEFAULT_TERMINAL_BLOCKLIST = frozenset({
|
|
45
|
+
"cmd.exe", "powershell.exe", "pwsh.exe",
|
|
46
|
+
"windowsterminal.exe", "mintty.exe", "bash.exe",
|
|
47
|
+
"wsl.exe", "conhost.exe", "alacritty.exe",
|
|
48
|
+
"wezterm-gui.exe", "hyper.exe",
|
|
49
|
+
})
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def detect_selected_text(
|
|
53
|
+
window: WindowInfo | None,
|
|
54
|
+
config: ConfigManager,
|
|
55
|
+
cancel_event: threading.Event | None = None,
|
|
56
|
+
) -> str:
|
|
57
|
+
"""Prueft ob im aktiven Fenster Text markiert ist und gibt ihn zurueck.
|
|
58
|
+
|
|
59
|
+
Ablauf:
|
|
60
|
+
1. Feature-Flag und Terminal-Blocklist pruefen
|
|
61
|
+
2. Clipboard sichern → Sentinel setzen → Ctrl+C → Clipboard lesen → wiederherstellen
|
|
62
|
+
3. Wenn Clipboard != Sentinel: markierter Text gefunden
|
|
63
|
+
|
|
64
|
+
Gibt in ALLEN Fehler-/Abbruch-Faellen "" zurueck — wirft nie Exceptions.
|
|
65
|
+
Die Pipeline wird dadurch nie blockiert oder gestoert.
|
|
66
|
+
|
|
67
|
+
Args:
|
|
68
|
+
window: Info ueber das aktive Fenster (fuer Blocklist-Check).
|
|
69
|
+
config: ConfigManager-Instanz.
|
|
70
|
+
cancel_event: Optionales Event zum Abbrechen der Erkennung.
|
|
71
|
+
|
|
72
|
+
Returns:
|
|
73
|
+
Markierter Text oder "" wenn nichts markiert / Fehler / deaktiviert.
|
|
74
|
+
"""
|
|
75
|
+
t_start = time.perf_counter()
|
|
76
|
+
|
|
77
|
+
try:
|
|
78
|
+
# 1. Feature-Flag pruefen
|
|
79
|
+
if not config.get("context.detect_selection", True):
|
|
80
|
+
log.debug("Context detection disabled by config")
|
|
81
|
+
return ""
|
|
82
|
+
|
|
83
|
+
# 2. Abbruch pruefen
|
|
84
|
+
if cancel_event and cancel_event.is_set():
|
|
85
|
+
log.debug("Context detection cancelled before start")
|
|
86
|
+
return ""
|
|
87
|
+
|
|
88
|
+
# 3. Terminal-Blocklist pruefen
|
|
89
|
+
if window and window.process_name:
|
|
90
|
+
process_lower = window.process_name.lower()
|
|
91
|
+
blocklist = config.get("context.terminal_blocklist", [])
|
|
92
|
+
blocklist_lower = {p.lower() for p in blocklist}
|
|
93
|
+
# Auch Default-Blocklist pruefen
|
|
94
|
+
blocklist_lower.update(p.lower() for p in _DEFAULT_TERMINAL_BLOCKLIST)
|
|
95
|
+
|
|
96
|
+
if process_lower in blocklist_lower:
|
|
97
|
+
log.debug(
|
|
98
|
+
"Context detection skipped: terminal process '%s'",
|
|
99
|
+
window.process_name,
|
|
100
|
+
)
|
|
101
|
+
return ""
|
|
102
|
+
|
|
103
|
+
log.debug(
|
|
104
|
+
"Context detection started for window '%s'",
|
|
105
|
+
window.process_name,
|
|
106
|
+
)
|
|
107
|
+
else:
|
|
108
|
+
log.debug("Context detection started (no window info)")
|
|
109
|
+
|
|
110
|
+
# 4. Clipboard sichern
|
|
111
|
+
try:
|
|
112
|
+
original_clipboard = pyperclip.paste()
|
|
113
|
+
except Exception:
|
|
114
|
+
original_clipboard = ""
|
|
115
|
+
log.debug("Clipboard backed up (%d chars)", len(original_clipboard))
|
|
116
|
+
|
|
117
|
+
# 5. Sentinel auf Clipboard setzen
|
|
118
|
+
try:
|
|
119
|
+
pyperclip.copy(_SENTINEL)
|
|
120
|
+
except Exception as e:
|
|
121
|
+
log.warning("Context detection: clipboard write failed: %s", e)
|
|
122
|
+
return ""
|
|
123
|
+
log.debug("Sentinel placed on clipboard")
|
|
124
|
+
|
|
125
|
+
# 6. Abbruch pruefen
|
|
126
|
+
if cancel_event and cancel_event.is_set():
|
|
127
|
+
_restore_clipboard(original_clipboard)
|
|
128
|
+
log.debug("Context detection cancelled before Ctrl+C")
|
|
129
|
+
return ""
|
|
130
|
+
|
|
131
|
+
# 7. Warten bis Modifier-Keys losgelassen sind (noetig fuer Toggle-Hotkey:
|
|
132
|
+
# User haelt noch Ctrl+Alt → Ctrl+C wuerde als Ctrl+Alt+C ankommen)
|
|
133
|
+
_wait_for_modifiers_released()
|
|
134
|
+
|
|
135
|
+
# 8. Ctrl+C senden
|
|
136
|
+
pyautogui.hotkey("ctrl", "c")
|
|
137
|
+
t_ctrlc = time.perf_counter()
|
|
138
|
+
log.debug("Ctrl+C sent (%.1fms after start)", (t_ctrlc - t_start) * 1000)
|
|
139
|
+
|
|
140
|
+
# 9. Warten bis Clipboard aktualisiert
|
|
141
|
+
time.sleep(_CLIPBOARD_WAIT_MS / 1000)
|
|
142
|
+
|
|
143
|
+
# 10. Clipboard lesen
|
|
144
|
+
try:
|
|
145
|
+
clipboard_content = pyperclip.paste()
|
|
146
|
+
except Exception as e:
|
|
147
|
+
log.warning("Context detection: clipboard read failed: %s", e)
|
|
148
|
+
_restore_clipboard(original_clipboard)
|
|
149
|
+
return ""
|
|
150
|
+
|
|
151
|
+
t_read = time.perf_counter()
|
|
152
|
+
log.debug(
|
|
153
|
+
"Clipboard read after Ctrl+C (%.1fms after start)",
|
|
154
|
+
(t_read - t_start) * 1000,
|
|
155
|
+
)
|
|
156
|
+
|
|
157
|
+
# 11. Auswerten: Hat Ctrl+C den Sentinel ueberschrieben?
|
|
158
|
+
if clipboard_content == _SENTINEL:
|
|
159
|
+
# Sentinel unveraendert → nichts war markiert
|
|
160
|
+
_restore_clipboard(original_clipboard)
|
|
161
|
+
t_end = time.perf_counter()
|
|
162
|
+
log.debug("No text selected (%.1fms total)", (t_end - t_start) * 1000)
|
|
163
|
+
return ""
|
|
164
|
+
|
|
165
|
+
# Text war markiert!
|
|
166
|
+
selected_text = clipboard_content.strip()
|
|
167
|
+
|
|
168
|
+
# 12. Original-Clipboard wiederherstellen
|
|
169
|
+
_restore_clipboard(original_clipboard)
|
|
170
|
+
|
|
171
|
+
t_end = time.perf_counter()
|
|
172
|
+
if selected_text:
|
|
173
|
+
log.info(
|
|
174
|
+
"Selected text detected: %d chars (%.1fms total)",
|
|
175
|
+
len(selected_text),
|
|
176
|
+
(t_end - t_start) * 1000,
|
|
177
|
+
)
|
|
178
|
+
else:
|
|
179
|
+
log.debug("No text selected (empty after strip, %.1fms total)", (t_end - t_start) * 1000)
|
|
180
|
+
|
|
181
|
+
return selected_text
|
|
182
|
+
|
|
183
|
+
except Exception as e:
|
|
184
|
+
t_end = time.perf_counter()
|
|
185
|
+
log.warning(
|
|
186
|
+
"Context detection failed (%.1fms): %s",
|
|
187
|
+
(t_end - t_start) * 1000,
|
|
188
|
+
e,
|
|
189
|
+
)
|
|
190
|
+
return ""
|
|
191
|
+
|
|
192
|
+
|
|
193
|
+
def _wait_for_modifiers_released() -> None:
|
|
194
|
+
"""Wartet bis alle Modifier-Keys (Ctrl, Alt, Shift, Win) losgelassen sind.
|
|
195
|
+
|
|
196
|
+
Noetig fuer Toggle-Hotkey: Der User haelt noch Ctrl+Alt wenn die
|
|
197
|
+
Context-Detection startet. Ctrl+C waehrend Ctrl+Alt gehalten wird,
|
|
198
|
+
wuerde als Ctrl+Alt+C interpretiert und Copy nicht ausloesen.
|
|
199
|
+
|
|
200
|
+
Beim Hold-Hotkey sind die Keys bereits losgelassen → returned sofort.
|
|
201
|
+
"""
|
|
202
|
+
user32 = ctypes.windll.user32
|
|
203
|
+
deadline = time.perf_counter() + _MODIFIER_RELEASE_TIMEOUT_MS / 1000
|
|
204
|
+
|
|
205
|
+
while time.perf_counter() < deadline:
|
|
206
|
+
if not any(user32.GetAsyncKeyState(vk) & 0x8000 for vk in _VK_MODIFIERS):
|
|
207
|
+
return
|
|
208
|
+
time.sleep(0.01) # 10ms polling
|
|
209
|
+
|
|
210
|
+
# Timeout: Modifier immer noch gehalten — trotzdem weitermachen
|
|
211
|
+
log.debug(
|
|
212
|
+
"Modifier keys still held after %dms timeout",
|
|
213
|
+
_MODIFIER_RELEASE_TIMEOUT_MS,
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
|
|
217
|
+
def _restore_clipboard(content: str) -> None:
|
|
218
|
+
"""Stellt den Clipboard-Inhalt wieder her (best-effort)."""
|
|
219
|
+
try:
|
|
220
|
+
pyperclip.copy(content)
|
|
221
|
+
except Exception as e:
|
|
222
|
+
log.warning("Context detection: clipboard restore failed: %s", e)
|
|
223
|
+
|
|
224
|
+
|
|
225
|
+
def detect_selected_text_async(
|
|
226
|
+
window: WindowInfo | None,
|
|
227
|
+
config: ConfigManager,
|
|
228
|
+
cancel_event: threading.Event | None = None,
|
|
229
|
+
) -> Future[str]:
|
|
230
|
+
"""Startet detect_selected_text() in einem daemon-Thread.
|
|
231
|
+
|
|
232
|
+
Args:
|
|
233
|
+
window: Info ueber das aktive Fenster.
|
|
234
|
+
config: ConfigManager-Instanz.
|
|
235
|
+
cancel_event: Optionales Event zum Abbrechen.
|
|
236
|
+
|
|
237
|
+
Returns:
|
|
238
|
+
Future[str] das den markierten Text (oder "") enthaelt.
|
|
239
|
+
"""
|
|
240
|
+
future: Future[str] = Future()
|
|
241
|
+
|
|
242
|
+
def _worker() -> None:
|
|
243
|
+
try:
|
|
244
|
+
result = detect_selected_text(window, config, cancel_event)
|
|
245
|
+
future.set_result(result)
|
|
246
|
+
except Exception as e:
|
|
247
|
+
future.set_exception(e)
|
|
248
|
+
|
|
249
|
+
thread = threading.Thread(
|
|
250
|
+
target=_worker,
|
|
251
|
+
daemon=True,
|
|
252
|
+
name="context-detector",
|
|
253
|
+
)
|
|
254
|
+
thread.start()
|
|
255
|
+
return future
|
|
@@ -5,6 +5,7 @@ Unterstützt Hold-to-Record und Toggle-Modus.
|
|
|
5
5
|
"""
|
|
6
6
|
|
|
7
7
|
import threading
|
|
8
|
+
import time
|
|
8
9
|
from collections.abc import Callable
|
|
9
10
|
from typing import Any
|
|
10
11
|
|
|
@@ -62,6 +63,10 @@ for _grp in _MODIFIER_GROUPS.values():
|
|
|
62
63
|
# Alt-Keys fuer Menu-Bar-Workaround (Windows aktiviert Menueleiste bei bare Alt-Release)
|
|
63
64
|
_ALT_KEYS: set[keyboard.Key] = {keyboard.Key.alt_l, keyboard.Key.alt_r}
|
|
64
65
|
|
|
66
|
+
# Minimale Zeit zwischen zwei Toggle-Ausloesungen (Sekunden).
|
|
67
|
+
# Verhindert Ghost-Toggles durch synthetische Key-Events (z.B. pyautogui Ctrl+C).
|
|
68
|
+
_TOGGLE_DEBOUNCE_S = 0.5
|
|
69
|
+
|
|
65
70
|
|
|
66
71
|
def _resolve_key(key_str: str) -> keyboard.Key | keyboard.KeyCode:
|
|
67
72
|
"""Löst einen Config-String in ein pynput-Key-Objekt auf."""
|
|
@@ -141,6 +146,7 @@ class HotkeyListener:
|
|
|
141
146
|
self._pressed_keys: set[keyboard.Key | keyboard.KeyCode] = set()
|
|
142
147
|
self._hold_active = False
|
|
143
148
|
self._toggle_combo_held = False
|
|
149
|
+
self._toggle_last_fired: float = 0.0
|
|
144
150
|
self._listener: keyboard.Listener | None = None
|
|
145
151
|
self._lock = threading.Lock()
|
|
146
152
|
self._kb_controller: keyboard.Controller | None = None
|
|
@@ -212,12 +218,20 @@ class HotkeyListener:
|
|
|
212
218
|
if self._on_hold_start:
|
|
213
219
|
threading.Thread(target=self._on_hold_start, daemon=True).start()
|
|
214
220
|
|
|
215
|
-
# Toggle-Hotkey prüfen
|
|
216
|
-
if
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
+
# Toggle-Hotkey prüfen (Guard + Debounce gegen synthetische Key-Events)
|
|
222
|
+
if (self._toggle_keys
|
|
223
|
+
and not self._toggle_combo_held
|
|
224
|
+
and self._check_combo(self._toggle_keys, self._toggle_modifier_groups)):
|
|
225
|
+
now = time.monotonic()
|
|
226
|
+
if (now - self._toggle_last_fired) >= _TOGGLE_DEBOUNCE_S:
|
|
227
|
+
self._toggle_combo_held = True
|
|
228
|
+
self._toggle_last_fired = now
|
|
229
|
+
log.debug("Toggle-Hotkey gedrückt")
|
|
230
|
+
if self._on_toggle:
|
|
231
|
+
threading.Thread(target=self._on_toggle, daemon=True).start()
|
|
232
|
+
else:
|
|
233
|
+
self._toggle_combo_held = True
|
|
234
|
+
log.debug("Toggle-Hotkey ignoriert (Debounce)")
|
|
221
235
|
|
|
222
236
|
def _on_release(self, key: keyboard.Key | keyboard.KeyCode) -> None:
|
|
223
237
|
"""Callback für Key-Release-Events."""
|
|
@@ -301,6 +315,7 @@ class HotkeyListener:
|
|
|
301
315
|
# State zurücksetzen
|
|
302
316
|
self._hold_active = False
|
|
303
317
|
self._toggle_combo_held = False
|
|
318
|
+
self._toggle_last_fired = 0.0
|
|
304
319
|
self._pressed_keys.clear()
|
|
305
320
|
|
|
306
321
|
@property
|
{paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/pipeline/transcription.py
RENAMED
|
@@ -7,6 +7,7 @@ from __future__ import annotations
|
|
|
7
7
|
|
|
8
8
|
import threading
|
|
9
9
|
from collections.abc import Callable
|
|
10
|
+
from concurrent.futures import Future
|
|
10
11
|
from datetime import datetime, timezone
|
|
11
12
|
from pathlib import Path
|
|
12
13
|
|
|
@@ -38,6 +39,41 @@ _HALLUCINATION_PATTERNS = [
|
|
|
38
39
|
]
|
|
39
40
|
_HALLUCINATION_MAX_DURATION = 5.0
|
|
40
41
|
|
|
42
|
+
# Maximale Zeichen fuer Kontext im LLM-Prompt
|
|
43
|
+
_MAX_CONTEXT_CHARS = 2000
|
|
44
|
+
|
|
45
|
+
# Generischer Prompt fuer den Fall ohne Window-Mapping aber mit markiertem Kontext
|
|
46
|
+
_GENERIC_CONTEXT_PROMPT = (
|
|
47
|
+
"Du bist ein Transkriptions-Assistent. "
|
|
48
|
+
"Deine Aufgabe: Gib den transkribierten Text wieder und korrigiere dabei "
|
|
49
|
+
"Schreibweisen von Eigennamen und Fachbegriffen anhand des bereitgestellten Kontexts. "
|
|
50
|
+
"Gib NUR den korrigierten transkribierten Text aus. "
|
|
51
|
+
"Beantworte keine Fragen, fuege keine Erklaerungen hinzu."
|
|
52
|
+
)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def _enrich_prompt_with_context(system_prompt: str, selected_text: str) -> str:
|
|
56
|
+
"""Reichert einen System-Prompt mit markiertem Kontext-Text an.
|
|
57
|
+
|
|
58
|
+
Args:
|
|
59
|
+
system_prompt: Der bestehende System-Prompt.
|
|
60
|
+
selected_text: Markierter Text aus dem aktiven Fenster.
|
|
61
|
+
|
|
62
|
+
Returns:
|
|
63
|
+
Angereicherter System-Prompt.
|
|
64
|
+
"""
|
|
65
|
+
# Auf maximale Laenge kuerzen
|
|
66
|
+
if len(selected_text) > _MAX_CONTEXT_CHARS:
|
|
67
|
+
selected_text = selected_text[:_MAX_CONTEXT_CHARS]
|
|
68
|
+
log.debug("Kontext auf %d Zeichen gekuerzt", _MAX_CONTEXT_CHARS)
|
|
69
|
+
|
|
70
|
+
return (
|
|
71
|
+
f"{system_prompt}\n\n"
|
|
72
|
+
"Der Nutzer hat folgenden Text im aktiven Fenster markiert.\n"
|
|
73
|
+
"Verwende die darin enthaltenen Schreibweisen fuer Eigennamen und Fachbegriffe:\n"
|
|
74
|
+
f"<selected_context>\n{selected_text}\n</selected_context>"
|
|
75
|
+
)
|
|
76
|
+
|
|
41
77
|
|
|
42
78
|
def _is_hallucination(text: str, audio_duration: float) -> bool:
|
|
43
79
|
"""Prueft ob ein STT-Ergebnis eine Whisper-Halluzination ist.
|
|
@@ -227,6 +263,7 @@ class TranscriptionPipeline:
|
|
|
227
263
|
window: WindowInfo | None = None,
|
|
228
264
|
on_status: Callable[[str], None] | None = None,
|
|
229
265
|
audio_duration: float | None = None,
|
|
266
|
+
context_future: Future[str] | None = None,
|
|
230
267
|
) -> None:
|
|
231
268
|
"""Verarbeitet eine Aufnahme: STT -> (LLM) -> Text-Einfuegung.
|
|
232
269
|
|
|
@@ -236,6 +273,7 @@ class TranscriptionPipeline:
|
|
|
236
273
|
on_status: Optionaler Callback fuer Status-Updates (UI-Integration).
|
|
237
274
|
Wird mit STATUS_*-Konstanten aufgerufen.
|
|
238
275
|
audio_duration: Audio-Dauer in Sekunden (fuer Kosten-Tracking).
|
|
276
|
+
context_future: Optionales Future mit markiertem Text aus dem aktiven Fenster.
|
|
239
277
|
"""
|
|
240
278
|
def _notify(status: str, detail: str = "") -> None:
|
|
241
279
|
if on_status:
|
|
@@ -288,6 +326,27 @@ class TranscriptionPipeline:
|
|
|
288
326
|
# LLM-Formatierung (falls Window-Mapping existiert)
|
|
289
327
|
system_prompt, category_key = self._resolve_formatting(window)
|
|
290
328
|
|
|
329
|
+
# Kontext aus Future holen (sollte laengst fertig sein)
|
|
330
|
+
selected_context = ""
|
|
331
|
+
if context_future is not None:
|
|
332
|
+
try:
|
|
333
|
+
selected_context = context_future.result(timeout=0.5)
|
|
334
|
+
if selected_context:
|
|
335
|
+
log.info("Pipeline: Kontext verfuegbar (%d Zeichen)", len(selected_context))
|
|
336
|
+
except Exception as e:
|
|
337
|
+
log.warning("Pipeline: Context-Future fehlgeschlagen: %s", e)
|
|
338
|
+
|
|
339
|
+
# Prompt mit Kontext anreichern
|
|
340
|
+
if selected_context:
|
|
341
|
+
if system_prompt:
|
|
342
|
+
system_prompt = _enrich_prompt_with_context(system_prompt, selected_context)
|
|
343
|
+
elif self._llm:
|
|
344
|
+
# Kein Window-Mapping, aber Kontext vorhanden → generischer LLM-Call
|
|
345
|
+
system_prompt = _enrich_prompt_with_context(
|
|
346
|
+
_GENERIC_CONTEXT_PROMPT, selected_context
|
|
347
|
+
)
|
|
348
|
+
log.info("Pipeline: Generischer Kontext-Prompt (kein Window-Mapping)")
|
|
349
|
+
|
|
291
350
|
llm_used = False
|
|
292
351
|
llm_input_tokens = 0
|
|
293
352
|
llm_output_tokens = 0
|
|
@@ -381,6 +440,7 @@ class TranscriptionPipeline:
|
|
|
381
440
|
window: WindowInfo | None = None,
|
|
382
441
|
on_status: Callable[[str], None] | None = None,
|
|
383
442
|
audio_duration: float | None = None,
|
|
443
|
+
context_future: Future[str] | None = None,
|
|
384
444
|
) -> threading.Thread:
|
|
385
445
|
"""Startet die Pipeline in einem Hintergrund-Thread.
|
|
386
446
|
|
|
@@ -389,13 +449,14 @@ class TranscriptionPipeline:
|
|
|
389
449
|
window: Info ueber das Fenster bei Aufnahme-Start.
|
|
390
450
|
on_status: Optionaler Callback fuer Status-Updates.
|
|
391
451
|
audio_duration: Audio-Dauer in Sekunden (fuer Kosten-Tracking).
|
|
452
|
+
context_future: Optionales Future mit markiertem Text aus dem aktiven Fenster.
|
|
392
453
|
|
|
393
454
|
Returns:
|
|
394
455
|
Der gestartete Thread (fuer Tests / Monitoring).
|
|
395
456
|
"""
|
|
396
457
|
thread = threading.Thread(
|
|
397
458
|
target=self.process,
|
|
398
|
-
args=(wav_path, window, on_status, audio_duration),
|
|
459
|
+
args=(wav_path, window, on_status, audio_duration, context_future),
|
|
399
460
|
daemon=True,
|
|
400
461
|
name="pipeline-worker",
|
|
401
462
|
)
|
|
@@ -5,6 +5,7 @@ Zentrale App-Klasse: Single-Instance, Service-Init, Signal-Bridge, System Tray.
|
|
|
5
5
|
|
|
6
6
|
import os
|
|
7
7
|
import sys
|
|
8
|
+
import threading
|
|
8
9
|
|
|
9
10
|
from PySide6.QtCore import QEvent, QObject, QSharedMemory, Qt, QTimer, Signal
|
|
10
11
|
from PySide6.QtGui import QCursor, QIcon
|
|
@@ -427,6 +428,17 @@ class PayPerTranscriptApp:
|
|
|
427
428
|
window.process_name if window else "(unbekannt)",
|
|
428
429
|
)
|
|
429
430
|
|
|
431
|
+
# Kontext-Erkennung parallel starten (laeuft waehrend STT-API-Call)
|
|
432
|
+
context_future = None
|
|
433
|
+
self._context_cancel = threading.Event()
|
|
434
|
+
if (self._config.get("context.detect_selection", True)
|
|
435
|
+
and self._llm_provider is not None):
|
|
436
|
+
from paypertranscript.core.context_detector import detect_selected_text_async
|
|
437
|
+
|
|
438
|
+
context_future = detect_selected_text_async(
|
|
439
|
+
window, self._config, self._context_cancel
|
|
440
|
+
)
|
|
441
|
+
|
|
430
442
|
# Warnung bei sehr langer Aufnahme
|
|
431
443
|
if actual_duration > MAX_RECORDING_WARN:
|
|
432
444
|
log.warning(
|
|
@@ -439,6 +451,7 @@ class PayPerTranscriptApp:
|
|
|
439
451
|
wav_path, window,
|
|
440
452
|
on_status=self._on_pipeline_status,
|
|
441
453
|
audio_duration=actual_duration,
|
|
454
|
+
context_future=context_future,
|
|
442
455
|
)
|
|
443
456
|
|
|
444
457
|
def _on_toggle(self) -> None:
|
|
@@ -501,6 +514,8 @@ class PayPerTranscriptApp:
|
|
|
501
514
|
def _shutdown(self) -> None:
|
|
502
515
|
"""Fährt alle Services sauber herunter."""
|
|
503
516
|
log.info("PayPerTranscript wird beendet...")
|
|
517
|
+
if hasattr(self, '_context_cancel') and self._context_cancel:
|
|
518
|
+
self._context_cancel.set()
|
|
504
519
|
self._update_timer.stop()
|
|
505
520
|
self._amplitude_timer.stop()
|
|
506
521
|
self._overlay.dismiss()
|
|
File without changes
|
{paypertranscript-0.3.0 → paypertranscript-0.3.1}/PayPerTranscript.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
{paypertranscript-0.3.0 → paypertranscript-0.3.1}/PayPerTranscript.egg-info/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/assets/icons/arrow_down.svg
RENAMED
|
File without changes
|
|
File without changes
|
{paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/assets/icons/tray_green.png
RENAMED
|
File without changes
|
{paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/assets/icons/tray_orange.png
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/providers/groq_provider.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/ui/pages/settings_page.py
RENAMED
|
File without changes
|
{paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/ui/pages/statistics_page.py
RENAMED
|
File without changes
|
{paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/ui/pages/window_mapping_page.py
RENAMED
|
File without changes
|
{paypertranscript-0.3.0 → paypertranscript-0.3.1}/paypertranscript/ui/pages/word_list_page.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|