PyPI - ai-screenshooter - Versions diffs - 1.5.0__tar.gz → 1.7.1__tar.gz - Mend

ai-screenshooter 1.5.0tar.gz → 1.7.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

{ai_screenshooter-1.5.0 → ai_screenshooter-1.7.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ai-screenshooter
-Version: 1.5.0
+Version: 1.7.1
 Summary: A CLI tool to capture and send AI-powered screenshots
 Home-page: https://github.com/tech4vision/ai-screenshoter
 Author: Last Shot AI
@@ -14,6 +14,10 @@ Requires-Dist: requests
 Requires-Dist: Pillow
 Requires-Dist: pygetwindow
 Requires-Dist: pyperclip
+Requires-Dist: sounddevice
+Requires-Dist: soundfile
+Requires-Dist: numpy
+Requires-Dist: faster-whisper
 Dynamic: author
 Dynamic: author-email
 Dynamic: classifier

{ai_screenshooter-1.5.0 → ai_screenshooter-1.7.1}/ai_screenshooter.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ai-screenshooter
-Version: 1.5.0
+Version: 1.7.1
 Summary: A CLI tool to capture and send AI-powered screenshots
 Home-page: https://github.com/tech4vision/ai-screenshoter
 Author: Last Shot AI
@@ -14,6 +14,10 @@ Requires-Dist: requests
 Requires-Dist: Pillow
 Requires-Dist: pygetwindow
 Requires-Dist: pyperclip
+Requires-Dist: sounddevice
+Requires-Dist: soundfile
+Requires-Dist: numpy
+Requires-Dist: faster-whisper
 Dynamic: author
 Dynamic: author-email
 Dynamic: classifier

{ai_screenshooter-1.5.0 → ai_screenshooter-1.7.1}/ai_screenshooter.egg-info/requires.txt RENAMED Viewed

@@ -3,3 +3,7 @@ requests
 Pillow
 pygetwindow
 pyperclip
+sounddevice
+soundfile
+numpy
+faster-whisper

{ai_screenshooter-1.5.0 → ai_screenshooter-1.7.1}/ai_screenshot.py RENAMED Viewed

@@ -1,4 +1,5 @@
 import argparse
+import json
 import os
 import sys
 import signal
@@ -6,6 +7,7 @@ import logging
 import atexit
 import time
 import subprocess
+import threading
 import requests
 import pygetwindow as gw
 import pyperclip
@@ -17,9 +19,17 @@ from pynput import keyboard
 # Constants
 PID_FILE = Path.home() / ".ai-screenshooter.pid"
 LOG_FILE = Path.home() / ".ai-screenshooter.log"
+META_FILE = Path.home() / ".ai-screenshooter.meta.json"
 SCREENSHOT_DIR = Path.home() / ".ai-screenshooter" / "screenshots"
+AUDIO_DIR = Path.home() / ".ai-screenshooter" / "audio"
 TIMEOUT_SECONDS = 5 * 60 * 60  # 5 hours
+# Audio recording constants
+SAMPLE_RATE = 16000  # Whisper expects 16kHz
+CHANNELS = 1  # Mono audio
+WHISPER_MODEL = "base"  # Options: tiny, base, small, medium, large
+DOUBLE_TAP_THRESHOLD = 0.5  # 500ms window for double-tap
 # Server URLs
 PROD_URL = "https://service.tech4vision.net/ai-management-service/api/v1/sessions/code-challenge"
 LOCAL_URL = "http://localhost:8082/api/v1/sessions/code-challenge"
@@ -31,6 +41,13 @@ API_URL = None
 current_keys = set()
 logger = logging.getLogger("ai-screenshooter")
+# Voice recording state
+is_recording = False
+audio_thread = None
+audio_data = []
+whisper_model = None  # Lazy-loaded on first use
+last_esc_time = 0  # For double-tap detection
 if sys.platform == "win32":
     import ctypes
     from ctypes import Structure, c_long
@@ -82,6 +99,31 @@ def cleanup_pid_file():
             PID_FILE.unlink()
     except Exception:
         pass
+    try:
+        if META_FILE.exists():
+            META_FILE.unlink()
+    except Exception:
+        pass
+def write_meta_file(server_mode, server_url):
+    """Write process metadata for status command."""
+    meta = {
+        "started_at": time.time(),
+        "server_mode": server_mode,
+        "server_url": server_url,
+    }
+    META_FILE.write_text(json.dumps(meta))
+def read_meta_file():
+    """Read process metadata, return None if invalid."""
+    if not META_FILE.exists():
+        return None
+    try:
+        return json.loads(META_FILE.read_text())
+    except (ValueError, IOError):
+        return None
 # ============ Process Management ============
@@ -299,11 +341,196 @@ def send_clipboard_text():
         logger.error(f"Error sending clipboard text: {e}")
+# ============ Voice Recording Functions ============
+def get_whisper_model():
+    """Lazy-load Whisper model on first use."""
+    global whisper_model
+    if whisper_model is None:
+        try:
+            from faster_whisper import WhisperModel
+            logger.info(f"Loading Whisper model '{WHISPER_MODEL}' (first time may download ~74MB)...")
+            whisper_model = WhisperModel(WHISPER_MODEL, device="cpu", compute_type="int8")
+            logger.info("Whisper model loaded successfully.")
+        except Exception as e:
+            logger.error(f"Failed to load Whisper model: {e}")
+            return None
+    return whisper_model
+def record_audio():
+    """Record audio from microphone in a separate thread."""
+    global audio_data, is_recording
+    import sounddevice as sd
+    audio_data = []
+    def audio_callback(indata, frames, time_info, status):
+        if status:
+            logger.warning(f"Audio status: {status}")
+        if is_recording:
+            audio_data.append(indata.copy())
+    try:
+        with sd.InputStream(samplerate=SAMPLE_RATE, channels=CHANNELS,
+                           callback=audio_callback, dtype='float32'):
+            while is_recording:
+                sd.sleep(100)  # Sleep 100ms, check if still recording
+    except Exception as e:
+        logger.error(f"Microphone error: {e}")
+def start_voice_recording():
+    """Start recording audio in a background thread."""
+    global is_recording, audio_thread, audio_data
+    if is_recording:
+        return  # Already recording
+    logger.info("Voice recording started... (release ESC to stop)")
+    is_recording = True
+    audio_data = []
+    audio_thread = threading.Thread(target=record_audio, daemon=True)
+    audio_thread.start()
+def stop_voice_recording_and_send():
+    """Stop recording, transcribe audio, and send to API."""
+    global is_recording, audio_thread, audio_data
+    if not is_recording:
+        return
+    logger.info("Voice recording stopped, processing...")
+    is_recording = False
+    # Wait for recording thread to finish
+    if audio_thread:
+        audio_thread.join(timeout=1.0)
+    # Check if we have audio data
+    if not audio_data:
+        logger.warning("No audio recorded.")
+        return
+    # Combine audio chunks
+    try:
+        import numpy as np
+        import soundfile as sf
+        audio_array = np.concatenate(audio_data, axis=0)
+        # Minimum recording duration check (0.5 seconds)
+        if len(audio_array) < SAMPLE_RATE * 0.5:
+            logger.warning("Recording too short, ignoring.")
+            return
+        # Save to temporary file
+        AUDIO_DIR.mkdir(parents=True, exist_ok=True)
+        temp_audio_path = AUDIO_DIR / f"recording_{int(time.time())}.wav"
+        sf.write(str(temp_audio_path), audio_array, SAMPLE_RATE)
+        logger.info(f"Audio saved: {temp_audio_path}")
+        # Transcribe
+        transcribed_text = transcribe_audio(temp_audio_path)
+        if transcribed_text:
+            # Send to API
+            send_transcribed_text(transcribed_text)
+    except Exception as e:
+        logger.error(f"Error processing audio: {e}")
+    finally:
+        # Cleanup temp file
+        try:
+            if 'temp_audio_path' in locals() and temp_audio_path.exists():
+                temp_audio_path.unlink()
+        except Exception:
+            pass
+def transcribe_audio(audio_path):
+    """Transcribe audio file using Whisper."""
+    try:
+        model = get_whisper_model()
+        if model is None:
+            return None
+        logger.info("Transcribing audio...")
+        segments, info = model.transcribe(str(audio_path), beam_size=5)
+        # Combine all segments
+        text = " ".join([segment.text.strip() for segment in segments])
+        if text:
+            logger.info(f"Transcription: {text[:100]}{'...' if len(text) > 100 else ''}")
+        else:
+            logger.warning("Transcription returned empty text.")
+        return text
+    except Exception as e:
+        logger.error(f"Transcription error: {e}")
+        return None
+def send_transcribed_text(text):
+    """Send transcribed text to the Code tab API."""
+    if not API_TOKEN:
+        logger.error("No API token provided!")
+        return
+    if not text or not text.strip():
+        logger.warning("No text to send.")
+        return
+    try:
+        response = requests.post(
+            f"{API_URL}/chat",
+            headers={
+                "Authorization": f"Bearer {API_TOKEN}",
+                "Content-Type": "application/json"
+            },
+            json={"message": text}
+        )
+        if response.status_code == 200:
+            logger.info("Transcribed text sent successfully.")
+        else:
+            logger.error(f"Failed to send text: {response.text}")
+    except Exception as e:
+        logger.error(f"Error sending transcribed text: {e}")
 # ============ Keyboard Handlers ============
 def on_press(key):
-    current_keys.add(key)
+    global last_esc_time, is_recording
     try:
+        # Double-tap ESC detection for voice recording
+        if key == keyboard.Key.esc:
+            # Ignore repeated key events from holding ESC
+            if keyboard.Key.esc in current_keys:
+                return
+            current_keys.add(key)
+            current_time = time.time()
+            time_since_last = current_time - last_esc_time
+            if time_since_last < DOUBLE_TAP_THRESHOLD and not is_recording:
+                # Double-tap detected - start recording
+                start_voice_recording()
+            last_esc_time = current_time
+        # Track non-ESC keys for combo detection
+        else:
+            current_keys.add(key)
+        # Other hotkeys (ESC + arrow keys)
         if key == keyboard.Key.down and keyboard.Key.esc in current_keys:
             logger.info("Capturing screenshot...")
             capture_screenshot()
@@ -318,11 +545,18 @@ def on_press(key):
 def on_release(key):
+    global is_recording
     try:
         current_keys.remove(key)
     except KeyError:
         pass
+    # Stop voice recording when ESC is released
+    if is_recording and key == keyboard.Key.esc:
+        # Run transcription in background thread to not block keyboard listener
+        threading.Thread(target=stop_voice_recording_and_send, daemon=True).start()
 # ============ CLI Commands ============
@@ -330,19 +564,20 @@ def cmd_start(args):
     """Handle the start command."""
     global API_TOKEN, API_URL
-    # If --background flag, spawn a new process and exit
-    if args.background:
-        print("Starting in background mode...")
+    is_daemon = getattr(args, 'daemon', False)
+    # Kill any existing instance (unless this is the daemon subprocess itself)
+    if not is_daemon:
         killed = kill_existing_process()
         if killed:
-            print("Killed existing instance.")
+            print("Replaced existing instance.")
+    # If --background flag, spawn a new process and exit
+    if args.background:
+        print("Starting in background mode...")
         start_background_process(args.token, args.local)
         return
-    # If --daemon flag (internal), this is the actual daemon process
-    is_daemon = getattr(args, 'daemon', False)
     if is_daemon:
         # Write PID file
         write_pid_file()
@@ -365,11 +600,16 @@ def cmd_start(args):
     API_URL = LOCAL_URL if args.local else PROD_URL
     server_mode = "LOCAL" if args.local else "PRODUCTION"
+    # Write metadata for status command
+    write_meta_file(server_mode, API_URL)
     logger.info("AI Screenshot CLI started.")
     logger.info(f"Server: {server_mode} ({API_URL})")
     logger.info("Press ESC + Down to capture a screenshot.")
     logger.info("Press ESC + Up to send all stored screenshots.")
     logger.info("Press ESC + Right to send clipboard text to Code tab.")
+    logger.info("Double-tap ESC (hold on 2nd) to record voice and send transcription.")
     if not is_daemon:
         logger.info("Running... (Press Ctrl + C to exit)")
@@ -383,11 +623,38 @@ def cmd_status(args):
     pid = get_pid_from_file()
     if pid and is_process_running(pid):
         print(f"ai-screenshooter is running (PID: {pid})")
+        meta = read_meta_file()
+        if meta:
+            # Uptime
+            elapsed = time.time() - meta.get("started_at", time.time())
+            hours, remainder = divmod(int(elapsed), 3600)
+            minutes, seconds = divmod(remainder, 60)
+            print(f"  Uptime:  {hours}h {minutes}m {seconds}s")
+            # Time remaining
+            remaining = TIMEOUT_SECONDS - elapsed
+            if remaining > 0:
+                rh, rr = divmod(int(remaining), 3600)
+                rm, rs = divmod(rr, 60)
+                print(f"  Expires: {rh}h {rm}m {rs}s remaining")
+            # Server
+            print(f"  Server:  {meta.get('server_mode', 'UNKNOWN')} ({meta.get('server_url', '')})")
+        print()
+        print("  Listening for hotkeys:")
+        print("    ESC + Down        Capture screenshot")
+        print("    ESC + Up          Send all screenshots")
+        print("    ESC + Right       Send clipboard text to Code tab")
+        print("    Double-tap ESC    Record voice, transcribe and send")
         return 0
     else:
         print("ai-screenshooter is not running")
         if PID_FILE.exists():
-            print(f"(stale PID file exists at {PID_FILE})")
+            print(f"(stale PID file found, cleaning up)")
+            cleanup_pid_file()
         return 1

{ai_screenshooter-1.5.0 → ai_screenshooter-1.7.1}/setup.py RENAMED Viewed

@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
 setup(
     name="ai-screenshooter",
-    version="1.5.0",
+    version="1.7.1",
     packages=find_packages(),
     py_modules=["ai_screenshot"],
     install_requires=[
@@ -10,7 +10,11 @@ setup(
         "requests",
         "Pillow",
         "pygetwindow",
-        "pyperclip"
+        "pyperclip",
+        "sounddevice",
+        "soundfile",
+        "numpy",
+        "faster-whisper"
     ],
     entry_points={
         "console_scripts": [

{ai_screenshooter-1.5.0 → ai_screenshooter-1.7.1}/README.md RENAMED Viewed

File without changes

{ai_screenshooter-1.5.0 → ai_screenshooter-1.7.1}/ai_screenshooter.egg-info/SOURCES.txt RENAMED Viewed

File without changes

{ai_screenshooter-1.5.0 → ai_screenshooter-1.7.1}/ai_screenshooter.egg-info/dependency_links.txt RENAMED Viewed

File without changes

{ai_screenshooter-1.5.0 → ai_screenshooter-1.7.1}/ai_screenshooter.egg-info/entry_points.txt RENAMED Viewed

File without changes

{ai_screenshooter-1.5.0 → ai_screenshooter-1.7.1}/ai_screenshooter.egg-info/top_level.txt RENAMED Viewed

File without changes

{ai_screenshooter-1.5.0 → ai_screenshooter-1.7.1}/setup.cfg RENAMED Viewed

File without changes

ai-screenshooter 1.5.0__tar.gz → 1.7.1__tar.gz

ai-screenshooter 1.5.0tar.gz → 1.7.1tar.gz