ai-screenshooter 1.5.0__tar.gz → 1.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ai_screenshooter-1.5.0 → ai_screenshooter-1.7.0}/PKG-INFO +5 -1
- {ai_screenshooter-1.5.0 → ai_screenshooter-1.7.0}/ai_screenshooter.egg-info/PKG-INFO +5 -1
- {ai_screenshooter-1.5.0 → ai_screenshooter-1.7.0}/ai_screenshooter.egg-info/requires.txt +4 -0
- {ai_screenshooter-1.5.0 → ai_screenshooter-1.7.0}/ai_screenshot.py +202 -1
- {ai_screenshooter-1.5.0 → ai_screenshooter-1.7.0}/setup.py +6 -2
- {ai_screenshooter-1.5.0 → ai_screenshooter-1.7.0}/README.md +0 -0
- {ai_screenshooter-1.5.0 → ai_screenshooter-1.7.0}/ai_screenshooter.egg-info/SOURCES.txt +0 -0
- {ai_screenshooter-1.5.0 → ai_screenshooter-1.7.0}/ai_screenshooter.egg-info/dependency_links.txt +0 -0
- {ai_screenshooter-1.5.0 → ai_screenshooter-1.7.0}/ai_screenshooter.egg-info/entry_points.txt +0 -0
- {ai_screenshooter-1.5.0 → ai_screenshooter-1.7.0}/ai_screenshooter.egg-info/top_level.txt +0 -0
- {ai_screenshooter-1.5.0 → ai_screenshooter-1.7.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ai-screenshooter
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.7.0
|
|
4
4
|
Summary: A CLI tool to capture and send AI-powered screenshots
|
|
5
5
|
Home-page: https://github.com/tech4vision/ai-screenshoter
|
|
6
6
|
Author: Last Shot AI
|
|
@@ -14,6 +14,10 @@ Requires-Dist: requests
|
|
|
14
14
|
Requires-Dist: Pillow
|
|
15
15
|
Requires-Dist: pygetwindow
|
|
16
16
|
Requires-Dist: pyperclip
|
|
17
|
+
Requires-Dist: sounddevice
|
|
18
|
+
Requires-Dist: soundfile
|
|
19
|
+
Requires-Dist: numpy
|
|
20
|
+
Requires-Dist: faster-whisper
|
|
17
21
|
Dynamic: author
|
|
18
22
|
Dynamic: author-email
|
|
19
23
|
Dynamic: classifier
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ai-screenshooter
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.7.0
|
|
4
4
|
Summary: A CLI tool to capture and send AI-powered screenshots
|
|
5
5
|
Home-page: https://github.com/tech4vision/ai-screenshoter
|
|
6
6
|
Author: Last Shot AI
|
|
@@ -14,6 +14,10 @@ Requires-Dist: requests
|
|
|
14
14
|
Requires-Dist: Pillow
|
|
15
15
|
Requires-Dist: pygetwindow
|
|
16
16
|
Requires-Dist: pyperclip
|
|
17
|
+
Requires-Dist: sounddevice
|
|
18
|
+
Requires-Dist: soundfile
|
|
19
|
+
Requires-Dist: numpy
|
|
20
|
+
Requires-Dist: faster-whisper
|
|
17
21
|
Dynamic: author
|
|
18
22
|
Dynamic: author-email
|
|
19
23
|
Dynamic: classifier
|
|
@@ -6,6 +6,7 @@ import logging
|
|
|
6
6
|
import atexit
|
|
7
7
|
import time
|
|
8
8
|
import subprocess
|
|
9
|
+
import threading
|
|
9
10
|
import requests
|
|
10
11
|
import pygetwindow as gw
|
|
11
12
|
import pyperclip
|
|
@@ -18,8 +19,15 @@ from pynput import keyboard
|
|
|
18
19
|
PID_FILE = Path.home() / ".ai-screenshooter.pid"
|
|
19
20
|
LOG_FILE = Path.home() / ".ai-screenshooter.log"
|
|
20
21
|
SCREENSHOT_DIR = Path.home() / ".ai-screenshooter" / "screenshots"
|
|
22
|
+
AUDIO_DIR = Path.home() / ".ai-screenshooter" / "audio"
|
|
21
23
|
TIMEOUT_SECONDS = 5 * 60 * 60 # 5 hours
|
|
22
24
|
|
|
25
|
+
# Audio recording constants
|
|
26
|
+
SAMPLE_RATE = 16000 # Whisper expects 16kHz
|
|
27
|
+
CHANNELS = 1 # Mono audio
|
|
28
|
+
WHISPER_MODEL = "base" # Options: tiny, base, small, medium, large
|
|
29
|
+
DOUBLE_TAP_THRESHOLD = 0.5 # 500ms window for double-tap
|
|
30
|
+
|
|
23
31
|
# Server URLs
|
|
24
32
|
PROD_URL = "https://service.tech4vision.net/ai-management-service/api/v1/sessions/code-challenge"
|
|
25
33
|
LOCAL_URL = "http://localhost:8082/api/v1/sessions/code-challenge"
|
|
@@ -31,6 +39,13 @@ API_URL = None
|
|
|
31
39
|
current_keys = set()
|
|
32
40
|
logger = logging.getLogger("ai-screenshooter")
|
|
33
41
|
|
|
42
|
+
# Voice recording state
|
|
43
|
+
is_recording = False
|
|
44
|
+
audio_thread = None
|
|
45
|
+
audio_data = []
|
|
46
|
+
whisper_model = None # Lazy-loaded on first use
|
|
47
|
+
last_esc_time = 0 # For double-tap detection
|
|
48
|
+
|
|
34
49
|
if sys.platform == "win32":
|
|
35
50
|
import ctypes
|
|
36
51
|
from ctypes import Structure, c_long
|
|
@@ -299,12 +314,190 @@ def send_clipboard_text():
|
|
|
299
314
|
logger.error(f"Error sending clipboard text: {e}")
|
|
300
315
|
|
|
301
316
|
|
|
317
|
+
# ============ Voice Recording Functions ============
|
|
318
|
+
|
|
319
|
+
def get_whisper_model():
|
|
320
|
+
"""Lazy-load Whisper model on first use."""
|
|
321
|
+
global whisper_model
|
|
322
|
+
if whisper_model is None:
|
|
323
|
+
try:
|
|
324
|
+
from faster_whisper import WhisperModel
|
|
325
|
+
logger.info(f"Loading Whisper model '{WHISPER_MODEL}' (first time may download ~74MB)...")
|
|
326
|
+
whisper_model = WhisperModel(WHISPER_MODEL, device="cpu", compute_type="int8")
|
|
327
|
+
logger.info("Whisper model loaded successfully.")
|
|
328
|
+
except Exception as e:
|
|
329
|
+
logger.error(f"Failed to load Whisper model: {e}")
|
|
330
|
+
return None
|
|
331
|
+
return whisper_model
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def record_audio():
|
|
335
|
+
"""Record audio from microphone in a separate thread."""
|
|
336
|
+
global audio_data, is_recording
|
|
337
|
+
import sounddevice as sd
|
|
338
|
+
|
|
339
|
+
audio_data = []
|
|
340
|
+
|
|
341
|
+
def audio_callback(indata, frames, time_info, status):
|
|
342
|
+
if status:
|
|
343
|
+
logger.warning(f"Audio status: {status}")
|
|
344
|
+
if is_recording:
|
|
345
|
+
audio_data.append(indata.copy())
|
|
346
|
+
|
|
347
|
+
try:
|
|
348
|
+
with sd.InputStream(samplerate=SAMPLE_RATE, channels=CHANNELS,
|
|
349
|
+
callback=audio_callback, dtype='float32'):
|
|
350
|
+
while is_recording:
|
|
351
|
+
sd.sleep(100) # Sleep 100ms, check if still recording
|
|
352
|
+
except Exception as e:
|
|
353
|
+
logger.error(f"Microphone error: {e}")
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
def start_voice_recording():
|
|
357
|
+
"""Start recording audio in a background thread."""
|
|
358
|
+
global is_recording, audio_thread, audio_data
|
|
359
|
+
|
|
360
|
+
if is_recording:
|
|
361
|
+
return # Already recording
|
|
362
|
+
|
|
363
|
+
logger.info("Voice recording started... (release ESC to stop)")
|
|
364
|
+
is_recording = True
|
|
365
|
+
audio_data = []
|
|
366
|
+
|
|
367
|
+
audio_thread = threading.Thread(target=record_audio, daemon=True)
|
|
368
|
+
audio_thread.start()
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def stop_voice_recording_and_send():
|
|
372
|
+
"""Stop recording, transcribe audio, and send to API."""
|
|
373
|
+
global is_recording, audio_thread, audio_data
|
|
374
|
+
|
|
375
|
+
if not is_recording:
|
|
376
|
+
return
|
|
377
|
+
|
|
378
|
+
logger.info("Voice recording stopped, processing...")
|
|
379
|
+
is_recording = False
|
|
380
|
+
|
|
381
|
+
# Wait for recording thread to finish
|
|
382
|
+
if audio_thread:
|
|
383
|
+
audio_thread.join(timeout=1.0)
|
|
384
|
+
|
|
385
|
+
# Check if we have audio data
|
|
386
|
+
if not audio_data:
|
|
387
|
+
logger.warning("No audio recorded.")
|
|
388
|
+
return
|
|
389
|
+
|
|
390
|
+
# Combine audio chunks
|
|
391
|
+
try:
|
|
392
|
+
import numpy as np
|
|
393
|
+
import soundfile as sf
|
|
394
|
+
|
|
395
|
+
audio_array = np.concatenate(audio_data, axis=0)
|
|
396
|
+
|
|
397
|
+
# Minimum recording duration check (0.5 seconds)
|
|
398
|
+
if len(audio_array) < SAMPLE_RATE * 0.5:
|
|
399
|
+
logger.warning("Recording too short, ignoring.")
|
|
400
|
+
return
|
|
401
|
+
|
|
402
|
+
# Save to temporary file
|
|
403
|
+
AUDIO_DIR.mkdir(parents=True, exist_ok=True)
|
|
404
|
+
temp_audio_path = AUDIO_DIR / f"recording_{int(time.time())}.wav"
|
|
405
|
+
|
|
406
|
+
sf.write(str(temp_audio_path), audio_array, SAMPLE_RATE)
|
|
407
|
+
logger.info(f"Audio saved: {temp_audio_path}")
|
|
408
|
+
|
|
409
|
+
# Transcribe
|
|
410
|
+
transcribed_text = transcribe_audio(temp_audio_path)
|
|
411
|
+
|
|
412
|
+
if transcribed_text:
|
|
413
|
+
# Send to API
|
|
414
|
+
send_transcribed_text(transcribed_text)
|
|
415
|
+
|
|
416
|
+
except Exception as e:
|
|
417
|
+
logger.error(f"Error processing audio: {e}")
|
|
418
|
+
finally:
|
|
419
|
+
# Cleanup temp file
|
|
420
|
+
try:
|
|
421
|
+
if 'temp_audio_path' in locals() and temp_audio_path.exists():
|
|
422
|
+
temp_audio_path.unlink()
|
|
423
|
+
except Exception:
|
|
424
|
+
pass
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
def transcribe_audio(audio_path):
|
|
428
|
+
"""Transcribe audio file using Whisper."""
|
|
429
|
+
try:
|
|
430
|
+
model = get_whisper_model()
|
|
431
|
+
if model is None:
|
|
432
|
+
return None
|
|
433
|
+
|
|
434
|
+
logger.info("Transcribing audio...")
|
|
435
|
+
segments, info = model.transcribe(str(audio_path), beam_size=5)
|
|
436
|
+
|
|
437
|
+
# Combine all segments
|
|
438
|
+
text = " ".join([segment.text.strip() for segment in segments])
|
|
439
|
+
|
|
440
|
+
if text:
|
|
441
|
+
logger.info(f"Transcription: {text[:100]}{'...' if len(text) > 100 else ''}")
|
|
442
|
+
else:
|
|
443
|
+
logger.warning("Transcription returned empty text.")
|
|
444
|
+
|
|
445
|
+
return text
|
|
446
|
+
|
|
447
|
+
except Exception as e:
|
|
448
|
+
logger.error(f"Transcription error: {e}")
|
|
449
|
+
return None
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
def send_transcribed_text(text):
|
|
453
|
+
"""Send transcribed text to the Code tab API."""
|
|
454
|
+
if not API_TOKEN:
|
|
455
|
+
logger.error("No API token provided!")
|
|
456
|
+
return
|
|
457
|
+
|
|
458
|
+
if not text or not text.strip():
|
|
459
|
+
logger.warning("No text to send.")
|
|
460
|
+
return
|
|
461
|
+
|
|
462
|
+
try:
|
|
463
|
+
response = requests.post(
|
|
464
|
+
f"{API_URL}/chat",
|
|
465
|
+
headers={
|
|
466
|
+
"Authorization": f"Bearer {API_TOKEN}",
|
|
467
|
+
"Content-Type": "application/json"
|
|
468
|
+
},
|
|
469
|
+
json={"message": text}
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
if response.status_code == 200:
|
|
473
|
+
logger.info("Transcribed text sent successfully.")
|
|
474
|
+
else:
|
|
475
|
+
logger.error(f"Failed to send text: {response.text}")
|
|
476
|
+
except Exception as e:
|
|
477
|
+
logger.error(f"Error sending transcribed text: {e}")
|
|
478
|
+
|
|
479
|
+
|
|
302
480
|
# ============ Keyboard Handlers ============
|
|
303
481
|
|
|
304
482
|
def on_press(key):
|
|
483
|
+
global last_esc_time, is_recording
|
|
484
|
+
|
|
305
485
|
current_keys.add(key)
|
|
486
|
+
|
|
306
487
|
try:
|
|
307
|
-
|
|
488
|
+
# Double-tap ESC detection for voice recording
|
|
489
|
+
if key == keyboard.Key.esc:
|
|
490
|
+
current_time = time.time()
|
|
491
|
+
time_since_last = current_time - last_esc_time
|
|
492
|
+
|
|
493
|
+
if time_since_last < DOUBLE_TAP_THRESHOLD and not is_recording:
|
|
494
|
+
# Double-tap detected - start recording
|
|
495
|
+
start_voice_recording()
|
|
496
|
+
|
|
497
|
+
last_esc_time = current_time
|
|
498
|
+
|
|
499
|
+
# Other hotkeys (ESC + arrow keys)
|
|
500
|
+
elif key == keyboard.Key.down and keyboard.Key.esc in current_keys:
|
|
308
501
|
logger.info("Capturing screenshot...")
|
|
309
502
|
capture_screenshot()
|
|
310
503
|
elif key == keyboard.Key.up and keyboard.Key.esc in current_keys:
|
|
@@ -318,11 +511,18 @@ def on_press(key):
|
|
|
318
511
|
|
|
319
512
|
|
|
320
513
|
def on_release(key):
|
|
514
|
+
global is_recording
|
|
515
|
+
|
|
321
516
|
try:
|
|
322
517
|
current_keys.remove(key)
|
|
323
518
|
except KeyError:
|
|
324
519
|
pass
|
|
325
520
|
|
|
521
|
+
# Stop voice recording when ESC is released
|
|
522
|
+
if is_recording and key == keyboard.Key.esc:
|
|
523
|
+
# Run transcription in background thread to not block keyboard listener
|
|
524
|
+
threading.Thread(target=stop_voice_recording_and_send, daemon=True).start()
|
|
525
|
+
|
|
326
526
|
|
|
327
527
|
# ============ CLI Commands ============
|
|
328
528
|
|
|
@@ -370,6 +570,7 @@ def cmd_start(args):
|
|
|
370
570
|
logger.info("Press ESC + Down to capture a screenshot.")
|
|
371
571
|
logger.info("Press ESC + Up to send all stored screenshots.")
|
|
372
572
|
logger.info("Press ESC + Right to send clipboard text to Code tab.")
|
|
573
|
+
logger.info("Double-tap ESC (hold on 2nd) to record voice and send transcription.")
|
|
373
574
|
if not is_daemon:
|
|
374
575
|
logger.info("Running... (Press Ctrl + C to exit)")
|
|
375
576
|
|
|
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
|
|
2
2
|
|
|
3
3
|
setup(
|
|
4
4
|
name="ai-screenshooter",
|
|
5
|
-
version="1.
|
|
5
|
+
version="1.7.0",
|
|
6
6
|
packages=find_packages(),
|
|
7
7
|
py_modules=["ai_screenshot"],
|
|
8
8
|
install_requires=[
|
|
@@ -10,7 +10,11 @@ setup(
|
|
|
10
10
|
"requests",
|
|
11
11
|
"Pillow",
|
|
12
12
|
"pygetwindow",
|
|
13
|
-
"pyperclip"
|
|
13
|
+
"pyperclip",
|
|
14
|
+
"sounddevice",
|
|
15
|
+
"soundfile",
|
|
16
|
+
"numpy",
|
|
17
|
+
"faster-whisper"
|
|
14
18
|
],
|
|
15
19
|
entry_points={
|
|
16
20
|
"console_scripts": [
|
|
File without changes
|
|
File without changes
|
{ai_screenshooter-1.5.0 → ai_screenshooter-1.7.0}/ai_screenshooter.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
{ai_screenshooter-1.5.0 → ai_screenshooter-1.7.0}/ai_screenshooter.egg-info/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|