ai-screenshooter 1.5.0__tar.gz → 1.7.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ai_screenshooter-1.5.0 → ai_screenshooter-1.7.1}/PKG-INFO +5 -1
- {ai_screenshooter-1.5.0 → ai_screenshooter-1.7.1}/ai_screenshooter.egg-info/PKG-INFO +5 -1
- {ai_screenshooter-1.5.0 → ai_screenshooter-1.7.1}/ai_screenshooter.egg-info/requires.txt +4 -0
- {ai_screenshooter-1.5.0 → ai_screenshooter-1.7.1}/ai_screenshot.py +276 -9
- {ai_screenshooter-1.5.0 → ai_screenshooter-1.7.1}/setup.py +6 -2
- {ai_screenshooter-1.5.0 → ai_screenshooter-1.7.1}/README.md +0 -0
- {ai_screenshooter-1.5.0 → ai_screenshooter-1.7.1}/ai_screenshooter.egg-info/SOURCES.txt +0 -0
- {ai_screenshooter-1.5.0 → ai_screenshooter-1.7.1}/ai_screenshooter.egg-info/dependency_links.txt +0 -0
- {ai_screenshooter-1.5.0 → ai_screenshooter-1.7.1}/ai_screenshooter.egg-info/entry_points.txt +0 -0
- {ai_screenshooter-1.5.0 → ai_screenshooter-1.7.1}/ai_screenshooter.egg-info/top_level.txt +0 -0
- {ai_screenshooter-1.5.0 → ai_screenshooter-1.7.1}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ai-screenshooter
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.7.1
|
|
4
4
|
Summary: A CLI tool to capture and send AI-powered screenshots
|
|
5
5
|
Home-page: https://github.com/tech4vision/ai-screenshoter
|
|
6
6
|
Author: Last Shot AI
|
|
@@ -14,6 +14,10 @@ Requires-Dist: requests
|
|
|
14
14
|
Requires-Dist: Pillow
|
|
15
15
|
Requires-Dist: pygetwindow
|
|
16
16
|
Requires-Dist: pyperclip
|
|
17
|
+
Requires-Dist: sounddevice
|
|
18
|
+
Requires-Dist: soundfile
|
|
19
|
+
Requires-Dist: numpy
|
|
20
|
+
Requires-Dist: faster-whisper
|
|
17
21
|
Dynamic: author
|
|
18
22
|
Dynamic: author-email
|
|
19
23
|
Dynamic: classifier
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ai-screenshooter
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.7.1
|
|
4
4
|
Summary: A CLI tool to capture and send AI-powered screenshots
|
|
5
5
|
Home-page: https://github.com/tech4vision/ai-screenshoter
|
|
6
6
|
Author: Last Shot AI
|
|
@@ -14,6 +14,10 @@ Requires-Dist: requests
|
|
|
14
14
|
Requires-Dist: Pillow
|
|
15
15
|
Requires-Dist: pygetwindow
|
|
16
16
|
Requires-Dist: pyperclip
|
|
17
|
+
Requires-Dist: sounddevice
|
|
18
|
+
Requires-Dist: soundfile
|
|
19
|
+
Requires-Dist: numpy
|
|
20
|
+
Requires-Dist: faster-whisper
|
|
17
21
|
Dynamic: author
|
|
18
22
|
Dynamic: author-email
|
|
19
23
|
Dynamic: classifier
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import argparse
|
|
2
|
+
import json
|
|
2
3
|
import os
|
|
3
4
|
import sys
|
|
4
5
|
import signal
|
|
@@ -6,6 +7,7 @@ import logging
|
|
|
6
7
|
import atexit
|
|
7
8
|
import time
|
|
8
9
|
import subprocess
|
|
10
|
+
import threading
|
|
9
11
|
import requests
|
|
10
12
|
import pygetwindow as gw
|
|
11
13
|
import pyperclip
|
|
@@ -17,9 +19,17 @@ from pynput import keyboard
|
|
|
17
19
|
# Constants
|
|
18
20
|
PID_FILE = Path.home() / ".ai-screenshooter.pid"
|
|
19
21
|
LOG_FILE = Path.home() / ".ai-screenshooter.log"
|
|
22
|
+
META_FILE = Path.home() / ".ai-screenshooter.meta.json"
|
|
20
23
|
SCREENSHOT_DIR = Path.home() / ".ai-screenshooter" / "screenshots"
|
|
24
|
+
AUDIO_DIR = Path.home() / ".ai-screenshooter" / "audio"
|
|
21
25
|
TIMEOUT_SECONDS = 5 * 60 * 60 # 5 hours
|
|
22
26
|
|
|
27
|
+
# Audio recording constants
|
|
28
|
+
SAMPLE_RATE = 16000 # Whisper expects 16kHz
|
|
29
|
+
CHANNELS = 1 # Mono audio
|
|
30
|
+
WHISPER_MODEL = "base" # Options: tiny, base, small, medium, large
|
|
31
|
+
DOUBLE_TAP_THRESHOLD = 0.5 # 500ms window for double-tap
|
|
32
|
+
|
|
23
33
|
# Server URLs
|
|
24
34
|
PROD_URL = "https://service.tech4vision.net/ai-management-service/api/v1/sessions/code-challenge"
|
|
25
35
|
LOCAL_URL = "http://localhost:8082/api/v1/sessions/code-challenge"
|
|
@@ -31,6 +41,13 @@ API_URL = None
|
|
|
31
41
|
current_keys = set()
|
|
32
42
|
logger = logging.getLogger("ai-screenshooter")
|
|
33
43
|
|
|
44
|
+
# Voice recording state
|
|
45
|
+
is_recording = False
|
|
46
|
+
audio_thread = None
|
|
47
|
+
audio_data = []
|
|
48
|
+
whisper_model = None # Lazy-loaded on first use
|
|
49
|
+
last_esc_time = 0 # For double-tap detection
|
|
50
|
+
|
|
34
51
|
if sys.platform == "win32":
|
|
35
52
|
import ctypes
|
|
36
53
|
from ctypes import Structure, c_long
|
|
@@ -82,6 +99,31 @@ def cleanup_pid_file():
|
|
|
82
99
|
PID_FILE.unlink()
|
|
83
100
|
except Exception:
|
|
84
101
|
pass
|
|
102
|
+
try:
|
|
103
|
+
if META_FILE.exists():
|
|
104
|
+
META_FILE.unlink()
|
|
105
|
+
except Exception:
|
|
106
|
+
pass
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def write_meta_file(server_mode, server_url):
|
|
110
|
+
"""Write process metadata for status command."""
|
|
111
|
+
meta = {
|
|
112
|
+
"started_at": time.time(),
|
|
113
|
+
"server_mode": server_mode,
|
|
114
|
+
"server_url": server_url,
|
|
115
|
+
}
|
|
116
|
+
META_FILE.write_text(json.dumps(meta))
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
def read_meta_file():
|
|
120
|
+
"""Read process metadata, return None if invalid."""
|
|
121
|
+
if not META_FILE.exists():
|
|
122
|
+
return None
|
|
123
|
+
try:
|
|
124
|
+
return json.loads(META_FILE.read_text())
|
|
125
|
+
except (ValueError, IOError):
|
|
126
|
+
return None
|
|
85
127
|
|
|
86
128
|
|
|
87
129
|
# ============ Process Management ============
|
|
@@ -299,11 +341,196 @@ def send_clipboard_text():
|
|
|
299
341
|
logger.error(f"Error sending clipboard text: {e}")
|
|
300
342
|
|
|
301
343
|
|
|
344
|
+
# ============ Voice Recording Functions ============
|
|
345
|
+
|
|
346
|
+
def get_whisper_model():
|
|
347
|
+
"""Lazy-load Whisper model on first use."""
|
|
348
|
+
global whisper_model
|
|
349
|
+
if whisper_model is None:
|
|
350
|
+
try:
|
|
351
|
+
from faster_whisper import WhisperModel
|
|
352
|
+
logger.info(f"Loading Whisper model '{WHISPER_MODEL}' (first time may download ~74MB)...")
|
|
353
|
+
whisper_model = WhisperModel(WHISPER_MODEL, device="cpu", compute_type="int8")
|
|
354
|
+
logger.info("Whisper model loaded successfully.")
|
|
355
|
+
except Exception as e:
|
|
356
|
+
logger.error(f"Failed to load Whisper model: {e}")
|
|
357
|
+
return None
|
|
358
|
+
return whisper_model
|
|
359
|
+
|
|
360
|
+
|
|
361
|
+
def record_audio():
|
|
362
|
+
"""Record audio from microphone in a separate thread."""
|
|
363
|
+
global audio_data, is_recording
|
|
364
|
+
import sounddevice as sd
|
|
365
|
+
|
|
366
|
+
audio_data = []
|
|
367
|
+
|
|
368
|
+
def audio_callback(indata, frames, time_info, status):
|
|
369
|
+
if status:
|
|
370
|
+
logger.warning(f"Audio status: {status}")
|
|
371
|
+
if is_recording:
|
|
372
|
+
audio_data.append(indata.copy())
|
|
373
|
+
|
|
374
|
+
try:
|
|
375
|
+
with sd.InputStream(samplerate=SAMPLE_RATE, channels=CHANNELS,
|
|
376
|
+
callback=audio_callback, dtype='float32'):
|
|
377
|
+
while is_recording:
|
|
378
|
+
sd.sleep(100) # Sleep 100ms, check if still recording
|
|
379
|
+
except Exception as e:
|
|
380
|
+
logger.error(f"Microphone error: {e}")
|
|
381
|
+
|
|
382
|
+
|
|
383
|
+
def start_voice_recording():
|
|
384
|
+
"""Start recording audio in a background thread."""
|
|
385
|
+
global is_recording, audio_thread, audio_data
|
|
386
|
+
|
|
387
|
+
if is_recording:
|
|
388
|
+
return # Already recording
|
|
389
|
+
|
|
390
|
+
logger.info("Voice recording started... (release ESC to stop)")
|
|
391
|
+
is_recording = True
|
|
392
|
+
audio_data = []
|
|
393
|
+
|
|
394
|
+
audio_thread = threading.Thread(target=record_audio, daemon=True)
|
|
395
|
+
audio_thread.start()
|
|
396
|
+
|
|
397
|
+
|
|
398
|
+
def stop_voice_recording_and_send():
|
|
399
|
+
"""Stop recording, transcribe audio, and send to API."""
|
|
400
|
+
global is_recording, audio_thread, audio_data
|
|
401
|
+
|
|
402
|
+
if not is_recording:
|
|
403
|
+
return
|
|
404
|
+
|
|
405
|
+
logger.info("Voice recording stopped, processing...")
|
|
406
|
+
is_recording = False
|
|
407
|
+
|
|
408
|
+
# Wait for recording thread to finish
|
|
409
|
+
if audio_thread:
|
|
410
|
+
audio_thread.join(timeout=1.0)
|
|
411
|
+
|
|
412
|
+
# Check if we have audio data
|
|
413
|
+
if not audio_data:
|
|
414
|
+
logger.warning("No audio recorded.")
|
|
415
|
+
return
|
|
416
|
+
|
|
417
|
+
# Combine audio chunks
|
|
418
|
+
try:
|
|
419
|
+
import numpy as np
|
|
420
|
+
import soundfile as sf
|
|
421
|
+
|
|
422
|
+
audio_array = np.concatenate(audio_data, axis=0)
|
|
423
|
+
|
|
424
|
+
# Minimum recording duration check (0.5 seconds)
|
|
425
|
+
if len(audio_array) < SAMPLE_RATE * 0.5:
|
|
426
|
+
logger.warning("Recording too short, ignoring.")
|
|
427
|
+
return
|
|
428
|
+
|
|
429
|
+
# Save to temporary file
|
|
430
|
+
AUDIO_DIR.mkdir(parents=True, exist_ok=True)
|
|
431
|
+
temp_audio_path = AUDIO_DIR / f"recording_{int(time.time())}.wav"
|
|
432
|
+
|
|
433
|
+
sf.write(str(temp_audio_path), audio_array, SAMPLE_RATE)
|
|
434
|
+
logger.info(f"Audio saved: {temp_audio_path}")
|
|
435
|
+
|
|
436
|
+
# Transcribe
|
|
437
|
+
transcribed_text = transcribe_audio(temp_audio_path)
|
|
438
|
+
|
|
439
|
+
if transcribed_text:
|
|
440
|
+
# Send to API
|
|
441
|
+
send_transcribed_text(transcribed_text)
|
|
442
|
+
|
|
443
|
+
except Exception as e:
|
|
444
|
+
logger.error(f"Error processing audio: {e}")
|
|
445
|
+
finally:
|
|
446
|
+
# Cleanup temp file
|
|
447
|
+
try:
|
|
448
|
+
if 'temp_audio_path' in locals() and temp_audio_path.exists():
|
|
449
|
+
temp_audio_path.unlink()
|
|
450
|
+
except Exception:
|
|
451
|
+
pass
|
|
452
|
+
|
|
453
|
+
|
|
454
|
+
def transcribe_audio(audio_path):
|
|
455
|
+
"""Transcribe audio file using Whisper."""
|
|
456
|
+
try:
|
|
457
|
+
model = get_whisper_model()
|
|
458
|
+
if model is None:
|
|
459
|
+
return None
|
|
460
|
+
|
|
461
|
+
logger.info("Transcribing audio...")
|
|
462
|
+
segments, info = model.transcribe(str(audio_path), beam_size=5)
|
|
463
|
+
|
|
464
|
+
# Combine all segments
|
|
465
|
+
text = " ".join([segment.text.strip() for segment in segments])
|
|
466
|
+
|
|
467
|
+
if text:
|
|
468
|
+
logger.info(f"Transcription: {text[:100]}{'...' if len(text) > 100 else ''}")
|
|
469
|
+
else:
|
|
470
|
+
logger.warning("Transcription returned empty text.")
|
|
471
|
+
|
|
472
|
+
return text
|
|
473
|
+
|
|
474
|
+
except Exception as e:
|
|
475
|
+
logger.error(f"Transcription error: {e}")
|
|
476
|
+
return None
|
|
477
|
+
|
|
478
|
+
|
|
479
|
+
def send_transcribed_text(text):
|
|
480
|
+
"""Send transcribed text to the Code tab API."""
|
|
481
|
+
if not API_TOKEN:
|
|
482
|
+
logger.error("No API token provided!")
|
|
483
|
+
return
|
|
484
|
+
|
|
485
|
+
if not text or not text.strip():
|
|
486
|
+
logger.warning("No text to send.")
|
|
487
|
+
return
|
|
488
|
+
|
|
489
|
+
try:
|
|
490
|
+
response = requests.post(
|
|
491
|
+
f"{API_URL}/chat",
|
|
492
|
+
headers={
|
|
493
|
+
"Authorization": f"Bearer {API_TOKEN}",
|
|
494
|
+
"Content-Type": "application/json"
|
|
495
|
+
},
|
|
496
|
+
json={"message": text}
|
|
497
|
+
)
|
|
498
|
+
|
|
499
|
+
if response.status_code == 200:
|
|
500
|
+
logger.info("Transcribed text sent successfully.")
|
|
501
|
+
else:
|
|
502
|
+
logger.error(f"Failed to send text: {response.text}")
|
|
503
|
+
except Exception as e:
|
|
504
|
+
logger.error(f"Error sending transcribed text: {e}")
|
|
505
|
+
|
|
506
|
+
|
|
302
507
|
# ============ Keyboard Handlers ============
|
|
303
508
|
|
|
304
509
|
def on_press(key):
|
|
305
|
-
|
|
510
|
+
global last_esc_time, is_recording
|
|
511
|
+
|
|
306
512
|
try:
|
|
513
|
+
# Double-tap ESC detection for voice recording
|
|
514
|
+
if key == keyboard.Key.esc:
|
|
515
|
+
# Ignore repeated key events from holding ESC
|
|
516
|
+
if keyboard.Key.esc in current_keys:
|
|
517
|
+
return
|
|
518
|
+
current_keys.add(key)
|
|
519
|
+
|
|
520
|
+
current_time = time.time()
|
|
521
|
+
time_since_last = current_time - last_esc_time
|
|
522
|
+
|
|
523
|
+
if time_since_last < DOUBLE_TAP_THRESHOLD and not is_recording:
|
|
524
|
+
# Double-tap detected - start recording
|
|
525
|
+
start_voice_recording()
|
|
526
|
+
|
|
527
|
+
last_esc_time = current_time
|
|
528
|
+
|
|
529
|
+
# Track non-ESC keys for combo detection
|
|
530
|
+
else:
|
|
531
|
+
current_keys.add(key)
|
|
532
|
+
|
|
533
|
+
# Other hotkeys (ESC + arrow keys)
|
|
307
534
|
if key == keyboard.Key.down and keyboard.Key.esc in current_keys:
|
|
308
535
|
logger.info("Capturing screenshot...")
|
|
309
536
|
capture_screenshot()
|
|
@@ -318,11 +545,18 @@ def on_press(key):
|
|
|
318
545
|
|
|
319
546
|
|
|
320
547
|
def on_release(key):
|
|
548
|
+
global is_recording
|
|
549
|
+
|
|
321
550
|
try:
|
|
322
551
|
current_keys.remove(key)
|
|
323
552
|
except KeyError:
|
|
324
553
|
pass
|
|
325
554
|
|
|
555
|
+
# Stop voice recording when ESC is released
|
|
556
|
+
if is_recording and key == keyboard.Key.esc:
|
|
557
|
+
# Run transcription in background thread to not block keyboard listener
|
|
558
|
+
threading.Thread(target=stop_voice_recording_and_send, daemon=True).start()
|
|
559
|
+
|
|
326
560
|
|
|
327
561
|
# ============ CLI Commands ============
|
|
328
562
|
|
|
@@ -330,19 +564,20 @@ def cmd_start(args):
|
|
|
330
564
|
"""Handle the start command."""
|
|
331
565
|
global API_TOKEN, API_URL
|
|
332
566
|
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
567
|
+
is_daemon = getattr(args, 'daemon', False)
|
|
568
|
+
|
|
569
|
+
# Kill any existing instance (unless this is the daemon subprocess itself)
|
|
570
|
+
if not is_daemon:
|
|
336
571
|
killed = kill_existing_process()
|
|
337
572
|
if killed:
|
|
338
|
-
print("
|
|
573
|
+
print("Replaced existing instance.")
|
|
339
574
|
|
|
575
|
+
# If --background flag, spawn a new process and exit
|
|
576
|
+
if args.background:
|
|
577
|
+
print("Starting in background mode...")
|
|
340
578
|
start_background_process(args.token, args.local)
|
|
341
579
|
return
|
|
342
580
|
|
|
343
|
-
# If --daemon flag (internal), this is the actual daemon process
|
|
344
|
-
is_daemon = getattr(args, 'daemon', False)
|
|
345
|
-
|
|
346
581
|
if is_daemon:
|
|
347
582
|
# Write PID file
|
|
348
583
|
write_pid_file()
|
|
@@ -365,11 +600,16 @@ def cmd_start(args):
|
|
|
365
600
|
API_URL = LOCAL_URL if args.local else PROD_URL
|
|
366
601
|
|
|
367
602
|
server_mode = "LOCAL" if args.local else "PRODUCTION"
|
|
603
|
+
|
|
604
|
+
# Write metadata for status command
|
|
605
|
+
write_meta_file(server_mode, API_URL)
|
|
606
|
+
|
|
368
607
|
logger.info("AI Screenshot CLI started.")
|
|
369
608
|
logger.info(f"Server: {server_mode} ({API_URL})")
|
|
370
609
|
logger.info("Press ESC + Down to capture a screenshot.")
|
|
371
610
|
logger.info("Press ESC + Up to send all stored screenshots.")
|
|
372
611
|
logger.info("Press ESC + Right to send clipboard text to Code tab.")
|
|
612
|
+
logger.info("Double-tap ESC (hold on 2nd) to record voice and send transcription.")
|
|
373
613
|
if not is_daemon:
|
|
374
614
|
logger.info("Running... (Press Ctrl + C to exit)")
|
|
375
615
|
|
|
@@ -383,11 +623,38 @@ def cmd_status(args):
|
|
|
383
623
|
pid = get_pid_from_file()
|
|
384
624
|
if pid and is_process_running(pid):
|
|
385
625
|
print(f"ai-screenshooter is running (PID: {pid})")
|
|
626
|
+
|
|
627
|
+
meta = read_meta_file()
|
|
628
|
+
if meta:
|
|
629
|
+
# Uptime
|
|
630
|
+
elapsed = time.time() - meta.get("started_at", time.time())
|
|
631
|
+
hours, remainder = divmod(int(elapsed), 3600)
|
|
632
|
+
minutes, seconds = divmod(remainder, 60)
|
|
633
|
+
print(f" Uptime: {hours}h {minutes}m {seconds}s")
|
|
634
|
+
|
|
635
|
+
# Time remaining
|
|
636
|
+
remaining = TIMEOUT_SECONDS - elapsed
|
|
637
|
+
if remaining > 0:
|
|
638
|
+
rh, rr = divmod(int(remaining), 3600)
|
|
639
|
+
rm, rs = divmod(rr, 60)
|
|
640
|
+
print(f" Expires: {rh}h {rm}m {rs}s remaining")
|
|
641
|
+
|
|
642
|
+
# Server
|
|
643
|
+
print(f" Server: {meta.get('server_mode', 'UNKNOWN')} ({meta.get('server_url', '')})")
|
|
644
|
+
|
|
645
|
+
print()
|
|
646
|
+
print(" Listening for hotkeys:")
|
|
647
|
+
print(" ESC + Down Capture screenshot")
|
|
648
|
+
print(" ESC + Up Send all screenshots")
|
|
649
|
+
print(" ESC + Right Send clipboard text to Code tab")
|
|
650
|
+
print(" Double-tap ESC Record voice, transcribe and send")
|
|
651
|
+
|
|
386
652
|
return 0
|
|
387
653
|
else:
|
|
388
654
|
print("ai-screenshooter is not running")
|
|
389
655
|
if PID_FILE.exists():
|
|
390
|
-
print(f"(stale PID file
|
|
656
|
+
print(f"(stale PID file found, cleaning up)")
|
|
657
|
+
cleanup_pid_file()
|
|
391
658
|
return 1
|
|
392
659
|
|
|
393
660
|
|
|
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
|
|
|
2
2
|
|
|
3
3
|
setup(
|
|
4
4
|
name="ai-screenshooter",
|
|
5
|
-
version="1.
|
|
5
|
+
version="1.7.1",
|
|
6
6
|
packages=find_packages(),
|
|
7
7
|
py_modules=["ai_screenshot"],
|
|
8
8
|
install_requires=[
|
|
@@ -10,7 +10,11 @@ setup(
|
|
|
10
10
|
"requests",
|
|
11
11
|
"Pillow",
|
|
12
12
|
"pygetwindow",
|
|
13
|
-
"pyperclip"
|
|
13
|
+
"pyperclip",
|
|
14
|
+
"sounddevice",
|
|
15
|
+
"soundfile",
|
|
16
|
+
"numpy",
|
|
17
|
+
"faster-whisper"
|
|
14
18
|
],
|
|
15
19
|
entry_points={
|
|
16
20
|
"console_scripts": [
|
|
File without changes
|
|
File without changes
|
{ai_screenshooter-1.5.0 → ai_screenshooter-1.7.1}/ai_screenshooter.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
{ai_screenshooter-1.5.0 → ai_screenshooter-1.7.1}/ai_screenshooter.egg-info/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|