ai-screenshooter 1.3.0__tar.gz → 1.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ai_screenshooter-1.3.0 → ai_screenshooter-1.7.0}/PKG-INFO +6 -1
- {ai_screenshooter-1.3.0 → ai_screenshooter-1.7.0}/ai_screenshooter.egg-info/PKG-INFO +6 -1
- ai_screenshooter-1.7.0/ai_screenshooter.egg-info/requires.txt +9 -0
- {ai_screenshooter-1.3.0 → ai_screenshooter-1.7.0}/ai_screenshot.py +236 -1
- {ai_screenshooter-1.3.0 → ai_screenshooter-1.7.0}/setup.py +7 -2
- ai_screenshooter-1.3.0/ai_screenshooter.egg-info/requires.txt +0 -4
- {ai_screenshooter-1.3.0 → ai_screenshooter-1.7.0}/README.md +0 -0
- {ai_screenshooter-1.3.0 → ai_screenshooter-1.7.0}/ai_screenshooter.egg-info/SOURCES.txt +0 -0
- {ai_screenshooter-1.3.0 → ai_screenshooter-1.7.0}/ai_screenshooter.egg-info/dependency_links.txt +0 -0
- {ai_screenshooter-1.3.0 → ai_screenshooter-1.7.0}/ai_screenshooter.egg-info/entry_points.txt +0 -0
- {ai_screenshooter-1.3.0 → ai_screenshooter-1.7.0}/ai_screenshooter.egg-info/top_level.txt +0 -0
- {ai_screenshooter-1.3.0 → ai_screenshooter-1.7.0}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ai-screenshooter
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.7.0
|
|
4
4
|
Summary: A CLI tool to capture and send AI-powered screenshots
|
|
5
5
|
Home-page: https://github.com/tech4vision/ai-screenshoter
|
|
6
6
|
Author: Last Shot AI
|
|
@@ -13,6 +13,11 @@ Requires-Dist: pynput
|
|
|
13
13
|
Requires-Dist: requests
|
|
14
14
|
Requires-Dist: Pillow
|
|
15
15
|
Requires-Dist: pygetwindow
|
|
16
|
+
Requires-Dist: pyperclip
|
|
17
|
+
Requires-Dist: sounddevice
|
|
18
|
+
Requires-Dist: soundfile
|
|
19
|
+
Requires-Dist: numpy
|
|
20
|
+
Requires-Dist: faster-whisper
|
|
16
21
|
Dynamic: author
|
|
17
22
|
Dynamic: author-email
|
|
18
23
|
Dynamic: classifier
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ai-screenshooter
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.7.0
|
|
4
4
|
Summary: A CLI tool to capture and send AI-powered screenshots
|
|
5
5
|
Home-page: https://github.com/tech4vision/ai-screenshoter
|
|
6
6
|
Author: Last Shot AI
|
|
@@ -13,6 +13,11 @@ Requires-Dist: pynput
|
|
|
13
13
|
Requires-Dist: requests
|
|
14
14
|
Requires-Dist: Pillow
|
|
15
15
|
Requires-Dist: pygetwindow
|
|
16
|
+
Requires-Dist: pyperclip
|
|
17
|
+
Requires-Dist: sounddevice
|
|
18
|
+
Requires-Dist: soundfile
|
|
19
|
+
Requires-Dist: numpy
|
|
20
|
+
Requires-Dist: faster-whisper
|
|
16
21
|
Dynamic: author
|
|
17
22
|
Dynamic: author-email
|
|
18
23
|
Dynamic: classifier
|
|
@@ -6,8 +6,10 @@ import logging
|
|
|
6
6
|
import atexit
|
|
7
7
|
import time
|
|
8
8
|
import subprocess
|
|
9
|
+
import threading
|
|
9
10
|
import requests
|
|
10
11
|
import pygetwindow as gw
|
|
12
|
+
import pyperclip
|
|
11
13
|
from pathlib import Path
|
|
12
14
|
from PIL import ImageGrab
|
|
13
15
|
from pynput import keyboard
|
|
@@ -17,8 +19,15 @@ from pynput import keyboard
|
|
|
17
19
|
PID_FILE = Path.home() / ".ai-screenshooter.pid"
|
|
18
20
|
LOG_FILE = Path.home() / ".ai-screenshooter.log"
|
|
19
21
|
SCREENSHOT_DIR = Path.home() / ".ai-screenshooter" / "screenshots"
|
|
22
|
+
AUDIO_DIR = Path.home() / ".ai-screenshooter" / "audio"
|
|
20
23
|
TIMEOUT_SECONDS = 5 * 60 * 60 # 5 hours
|
|
21
24
|
|
|
25
|
+
# Audio recording constants
|
|
26
|
+
SAMPLE_RATE = 16000 # Whisper expects 16kHz
|
|
27
|
+
CHANNELS = 1 # Mono audio
|
|
28
|
+
WHISPER_MODEL = "base" # Options: tiny, base, small, medium, large
|
|
29
|
+
DOUBLE_TAP_THRESHOLD = 0.5 # 500ms window for double-tap
|
|
30
|
+
|
|
22
31
|
# Server URLs
|
|
23
32
|
PROD_URL = "https://service.tech4vision.net/ai-management-service/api/v1/sessions/code-challenge"
|
|
24
33
|
LOCAL_URL = "http://localhost:8082/api/v1/sessions/code-challenge"
|
|
@@ -30,6 +39,13 @@ API_URL = None
|
|
|
30
39
|
current_keys = set()
|
|
31
40
|
logger = logging.getLogger("ai-screenshooter")
|
|
32
41
|
|
|
42
|
+
# Voice recording state
|
|
43
|
+
is_recording = False
|
|
44
|
+
audio_thread = None
|
|
45
|
+
audio_data = []
|
|
46
|
+
whisper_model = None # Lazy-loaded on first use
|
|
47
|
+
last_esc_time = 0 # For double-tap detection
|
|
48
|
+
|
|
33
49
|
if sys.platform == "win32":
|
|
34
50
|
import ctypes
|
|
35
51
|
from ctypes import Structure, c_long
|
|
@@ -269,27 +285,244 @@ def send_screenshots():
|
|
|
269
285
|
logger.error(f"Error uploading screenshots: {e}")
|
|
270
286
|
|
|
271
287
|
|
|
288
|
+
def send_clipboard_text():
|
|
289
|
+
"""Send clipboard content to Code tab API."""
|
|
290
|
+
if not API_TOKEN:
|
|
291
|
+
logger.error("No API token provided!")
|
|
292
|
+
return
|
|
293
|
+
|
|
294
|
+
try:
|
|
295
|
+
text = pyperclip.paste()
|
|
296
|
+
if not text or not text.strip():
|
|
297
|
+
logger.warning("Clipboard is empty.")
|
|
298
|
+
return
|
|
299
|
+
|
|
300
|
+
response = requests.post(
|
|
301
|
+
f"{API_URL}/chat",
|
|
302
|
+
headers={
|
|
303
|
+
"Authorization": f"Bearer {API_TOKEN}",
|
|
304
|
+
"Content-Type": "application/json"
|
|
305
|
+
},
|
|
306
|
+
json={"message": text}
|
|
307
|
+
)
|
|
308
|
+
|
|
309
|
+
if response.status_code == 200:
|
|
310
|
+
logger.info("Text sent to Code tab successfully.")
|
|
311
|
+
else:
|
|
312
|
+
logger.error(f"Failed to send text: {response.text}")
|
|
313
|
+
except Exception as e:
|
|
314
|
+
logger.error(f"Error sending clipboard text: {e}")
|
|
315
|
+
|
|
316
|
+
|
|
317
|
+
# ============ Voice Recording Functions ============
|
|
318
|
+
|
|
319
|
+
def get_whisper_model():
|
|
320
|
+
"""Lazy-load Whisper model on first use."""
|
|
321
|
+
global whisper_model
|
|
322
|
+
if whisper_model is None:
|
|
323
|
+
try:
|
|
324
|
+
from faster_whisper import WhisperModel
|
|
325
|
+
logger.info(f"Loading Whisper model '{WHISPER_MODEL}' (first time may download ~74MB)...")
|
|
326
|
+
whisper_model = WhisperModel(WHISPER_MODEL, device="cpu", compute_type="int8")
|
|
327
|
+
logger.info("Whisper model loaded successfully.")
|
|
328
|
+
except Exception as e:
|
|
329
|
+
logger.error(f"Failed to load Whisper model: {e}")
|
|
330
|
+
return None
|
|
331
|
+
return whisper_model
|
|
332
|
+
|
|
333
|
+
|
|
334
|
+
def record_audio():
|
|
335
|
+
"""Record audio from microphone in a separate thread."""
|
|
336
|
+
global audio_data, is_recording
|
|
337
|
+
import sounddevice as sd
|
|
338
|
+
|
|
339
|
+
audio_data = []
|
|
340
|
+
|
|
341
|
+
def audio_callback(indata, frames, time_info, status):
|
|
342
|
+
if status:
|
|
343
|
+
logger.warning(f"Audio status: {status}")
|
|
344
|
+
if is_recording:
|
|
345
|
+
audio_data.append(indata.copy())
|
|
346
|
+
|
|
347
|
+
try:
|
|
348
|
+
with sd.InputStream(samplerate=SAMPLE_RATE, channels=CHANNELS,
|
|
349
|
+
callback=audio_callback, dtype='float32'):
|
|
350
|
+
while is_recording:
|
|
351
|
+
sd.sleep(100) # Sleep 100ms, check if still recording
|
|
352
|
+
except Exception as e:
|
|
353
|
+
logger.error(f"Microphone error: {e}")
|
|
354
|
+
|
|
355
|
+
|
|
356
|
+
def start_voice_recording():
|
|
357
|
+
"""Start recording audio in a background thread."""
|
|
358
|
+
global is_recording, audio_thread, audio_data
|
|
359
|
+
|
|
360
|
+
if is_recording:
|
|
361
|
+
return # Already recording
|
|
362
|
+
|
|
363
|
+
logger.info("Voice recording started... (release ESC to stop)")
|
|
364
|
+
is_recording = True
|
|
365
|
+
audio_data = []
|
|
366
|
+
|
|
367
|
+
audio_thread = threading.Thread(target=record_audio, daemon=True)
|
|
368
|
+
audio_thread.start()
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
def stop_voice_recording_and_send():
|
|
372
|
+
"""Stop recording, transcribe audio, and send to API."""
|
|
373
|
+
global is_recording, audio_thread, audio_data
|
|
374
|
+
|
|
375
|
+
if not is_recording:
|
|
376
|
+
return
|
|
377
|
+
|
|
378
|
+
logger.info("Voice recording stopped, processing...")
|
|
379
|
+
is_recording = False
|
|
380
|
+
|
|
381
|
+
# Wait for recording thread to finish
|
|
382
|
+
if audio_thread:
|
|
383
|
+
audio_thread.join(timeout=1.0)
|
|
384
|
+
|
|
385
|
+
# Check if we have audio data
|
|
386
|
+
if not audio_data:
|
|
387
|
+
logger.warning("No audio recorded.")
|
|
388
|
+
return
|
|
389
|
+
|
|
390
|
+
# Combine audio chunks
|
|
391
|
+
try:
|
|
392
|
+
import numpy as np
|
|
393
|
+
import soundfile as sf
|
|
394
|
+
|
|
395
|
+
audio_array = np.concatenate(audio_data, axis=0)
|
|
396
|
+
|
|
397
|
+
# Minimum recording duration check (0.5 seconds)
|
|
398
|
+
if len(audio_array) < SAMPLE_RATE * 0.5:
|
|
399
|
+
logger.warning("Recording too short, ignoring.")
|
|
400
|
+
return
|
|
401
|
+
|
|
402
|
+
# Save to temporary file
|
|
403
|
+
AUDIO_DIR.mkdir(parents=True, exist_ok=True)
|
|
404
|
+
temp_audio_path = AUDIO_DIR / f"recording_{int(time.time())}.wav"
|
|
405
|
+
|
|
406
|
+
sf.write(str(temp_audio_path), audio_array, SAMPLE_RATE)
|
|
407
|
+
logger.info(f"Audio saved: {temp_audio_path}")
|
|
408
|
+
|
|
409
|
+
# Transcribe
|
|
410
|
+
transcribed_text = transcribe_audio(temp_audio_path)
|
|
411
|
+
|
|
412
|
+
if transcribed_text:
|
|
413
|
+
# Send to API
|
|
414
|
+
send_transcribed_text(transcribed_text)
|
|
415
|
+
|
|
416
|
+
except Exception as e:
|
|
417
|
+
logger.error(f"Error processing audio: {e}")
|
|
418
|
+
finally:
|
|
419
|
+
# Cleanup temp file
|
|
420
|
+
try:
|
|
421
|
+
if 'temp_audio_path' in locals() and temp_audio_path.exists():
|
|
422
|
+
temp_audio_path.unlink()
|
|
423
|
+
except Exception:
|
|
424
|
+
pass
|
|
425
|
+
|
|
426
|
+
|
|
427
|
+
def transcribe_audio(audio_path):
|
|
428
|
+
"""Transcribe audio file using Whisper."""
|
|
429
|
+
try:
|
|
430
|
+
model = get_whisper_model()
|
|
431
|
+
if model is None:
|
|
432
|
+
return None
|
|
433
|
+
|
|
434
|
+
logger.info("Transcribing audio...")
|
|
435
|
+
segments, info = model.transcribe(str(audio_path), beam_size=5)
|
|
436
|
+
|
|
437
|
+
# Combine all segments
|
|
438
|
+
text = " ".join([segment.text.strip() for segment in segments])
|
|
439
|
+
|
|
440
|
+
if text:
|
|
441
|
+
logger.info(f"Transcription: {text[:100]}{'...' if len(text) > 100 else ''}")
|
|
442
|
+
else:
|
|
443
|
+
logger.warning("Transcription returned empty text.")
|
|
444
|
+
|
|
445
|
+
return text
|
|
446
|
+
|
|
447
|
+
except Exception as e:
|
|
448
|
+
logger.error(f"Transcription error: {e}")
|
|
449
|
+
return None
|
|
450
|
+
|
|
451
|
+
|
|
452
|
+
def send_transcribed_text(text):
|
|
453
|
+
"""Send transcribed text to the Code tab API."""
|
|
454
|
+
if not API_TOKEN:
|
|
455
|
+
logger.error("No API token provided!")
|
|
456
|
+
return
|
|
457
|
+
|
|
458
|
+
if not text or not text.strip():
|
|
459
|
+
logger.warning("No text to send.")
|
|
460
|
+
return
|
|
461
|
+
|
|
462
|
+
try:
|
|
463
|
+
response = requests.post(
|
|
464
|
+
f"{API_URL}/chat",
|
|
465
|
+
headers={
|
|
466
|
+
"Authorization": f"Bearer {API_TOKEN}",
|
|
467
|
+
"Content-Type": "application/json"
|
|
468
|
+
},
|
|
469
|
+
json={"message": text}
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
if response.status_code == 200:
|
|
473
|
+
logger.info("Transcribed text sent successfully.")
|
|
474
|
+
else:
|
|
475
|
+
logger.error(f"Failed to send text: {response.text}")
|
|
476
|
+
except Exception as e:
|
|
477
|
+
logger.error(f"Error sending transcribed text: {e}")
|
|
478
|
+
|
|
479
|
+
|
|
272
480
|
# ============ Keyboard Handlers ============
|
|
273
481
|
|
|
274
482
|
def on_press(key):
|
|
483
|
+
global last_esc_time, is_recording
|
|
484
|
+
|
|
275
485
|
current_keys.add(key)
|
|
486
|
+
|
|
276
487
|
try:
|
|
277
|
-
|
|
488
|
+
# Double-tap ESC detection for voice recording
|
|
489
|
+
if key == keyboard.Key.esc:
|
|
490
|
+
current_time = time.time()
|
|
491
|
+
time_since_last = current_time - last_esc_time
|
|
492
|
+
|
|
493
|
+
if time_since_last < DOUBLE_TAP_THRESHOLD and not is_recording:
|
|
494
|
+
# Double-tap detected - start recording
|
|
495
|
+
start_voice_recording()
|
|
496
|
+
|
|
497
|
+
last_esc_time = current_time
|
|
498
|
+
|
|
499
|
+
# Other hotkeys (ESC + arrow keys)
|
|
500
|
+
elif key == keyboard.Key.down and keyboard.Key.esc in current_keys:
|
|
278
501
|
logger.info("Capturing screenshot...")
|
|
279
502
|
capture_screenshot()
|
|
280
503
|
elif key == keyboard.Key.up and keyboard.Key.esc in current_keys:
|
|
281
504
|
logger.info("Sending all screenshots...")
|
|
282
505
|
send_screenshots()
|
|
506
|
+
elif key == keyboard.Key.right and keyboard.Key.esc in current_keys:
|
|
507
|
+
logger.info("Sending clipboard text to Code tab...")
|
|
508
|
+
send_clipboard_text()
|
|
283
509
|
except AttributeError:
|
|
284
510
|
pass
|
|
285
511
|
|
|
286
512
|
|
|
287
513
|
def on_release(key):
|
|
514
|
+
global is_recording
|
|
515
|
+
|
|
288
516
|
try:
|
|
289
517
|
current_keys.remove(key)
|
|
290
518
|
except KeyError:
|
|
291
519
|
pass
|
|
292
520
|
|
|
521
|
+
# Stop voice recording when ESC is released
|
|
522
|
+
if is_recording and key == keyboard.Key.esc:
|
|
523
|
+
# Run transcription in background thread to not block keyboard listener
|
|
524
|
+
threading.Thread(target=stop_voice_recording_and_send, daemon=True).start()
|
|
525
|
+
|
|
293
526
|
|
|
294
527
|
# ============ CLI Commands ============
|
|
295
528
|
|
|
@@ -336,6 +569,8 @@ def cmd_start(args):
|
|
|
336
569
|
logger.info(f"Server: {server_mode} ({API_URL})")
|
|
337
570
|
logger.info("Press ESC + Down to capture a screenshot.")
|
|
338
571
|
logger.info("Press ESC + Up to send all stored screenshots.")
|
|
572
|
+
logger.info("Press ESC + Right to send clipboard text to Code tab.")
|
|
573
|
+
logger.info("Double-tap ESC (hold on 2nd) to record voice and send transcription.")
|
|
339
574
|
if not is_daemon:
|
|
340
575
|
logger.info("Running... (Press Ctrl + C to exit)")
|
|
341
576
|
|
|
@@ -2,14 +2,19 @@ from setuptools import setup, find_packages
|
|
|
2
2
|
|
|
3
3
|
setup(
|
|
4
4
|
name="ai-screenshooter",
|
|
5
|
-
version="1.
|
|
5
|
+
version="1.7.0",
|
|
6
6
|
packages=find_packages(),
|
|
7
7
|
py_modules=["ai_screenshot"],
|
|
8
8
|
install_requires=[
|
|
9
9
|
"pynput",
|
|
10
10
|
"requests",
|
|
11
11
|
"Pillow",
|
|
12
|
-
"pygetwindow"
|
|
12
|
+
"pygetwindow",
|
|
13
|
+
"pyperclip",
|
|
14
|
+
"sounddevice",
|
|
15
|
+
"soundfile",
|
|
16
|
+
"numpy",
|
|
17
|
+
"faster-whisper"
|
|
13
18
|
],
|
|
14
19
|
entry_points={
|
|
15
20
|
"console_scripts": [
|
|
File without changes
|
|
File without changes
|
{ai_screenshooter-1.3.0 → ai_screenshooter-1.7.0}/ai_screenshooter.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
{ai_screenshooter-1.3.0 → ai_screenshooter-1.7.0}/ai_screenshooter.egg-info/entry_points.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|