PyPI - npcpy - Versions diffs - 1.2.34__py3-none-any.whl → 1.2.36__py3-none-any.whl - Mend

npcpy 1.2.34py3-none-any.whl → 1.2.36py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

npcpy/data/audio.py +35 -1
npcpy/data/load.py +149 -7
npcpy/data/video.py +72 -0
npcpy/ft/diff.py +332 -71
npcpy/gen/image_gen.py +120 -23
npcpy/gen/ocr.py +187 -0
npcpy/memory/command_history.py +231 -40
npcpy/npc_compiler.py +64 -22
npcpy/serve.py +1712 -607
{npcpy-1.2.34.dist-info → npcpy-1.2.36.dist-info}/METADATA +1 -1
{npcpy-1.2.34.dist-info → npcpy-1.2.36.dist-info}/RECORD +14 -13
{npcpy-1.2.34.dist-info → npcpy-1.2.36.dist-info}/WHEEL +0 -0
{npcpy-1.2.34.dist-info → npcpy-1.2.36.dist-info}/licenses/LICENSE +0 -0
{npcpy-1.2.34.dist-info → npcpy-1.2.36.dist-info}/top_level.txt +0 -0

npcpy/data/audio.py CHANGED Viewed

@@ -175,6 +175,41 @@ def run_transcription(audio_np):
         return None
+def transcribe_audio_file(file_path: str, language=None) -> str:
+    """
+    File-based transcription helper that prefers the local faster-whisper/whisper
+    setup used elsewhere in this module.
+    """
+    # Try faster-whisper first
+    try:
+        from faster_whisper import WhisperModel  # type: ignore
+        try:
+            import torch  # type: ignore
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+        except Exception:
+            device = "cpu"
+        model = WhisperModel("small", device=device)
+        segments, _ = model.transcribe(file_path, language=language, beam_size=5)
+        text = " ".join(seg.text.strip() for seg in segments if seg.text).strip()
+        if text:
+            return text
+    except Exception:
+        pass
+    # Fallback to openai/whisper if available
+    try:
+        import whisper  # type: ignore
+        model = whisper.load_model("small")
+        result = model.transcribe(file_path, language=language)
+        text = result.get("text", "").strip()
+        if text:
+            return text
+    except Exception:
+        pass
+    return ""
 def load_history():
     global history
@@ -431,4 +466,3 @@ def process_text_for_tts(text):
     text = re.sub(r"([.!?])(\w)", r"\1 \2", text)
     return text

npcpy/data/load.py CHANGED Viewed

@@ -4,8 +4,10 @@ import json
 import io
 from PIL import Image
 import numpy as np
-from typing import Optional
+from typing import Optional, List
 import os
+import tempfile
+import subprocess
 try:
     from docx import Document
@@ -90,12 +92,17 @@ extension_map = {
     "JPEG": "images",
     "GIF": "images",
     "SVG": "images",
+    "WEBP": "images",
+    "BMP": "images",
+    "TIFF": "images",
     "MP4": "videos",
     "AVI": "videos",
     "MOV": "videos",
     "WMV": "videos",
     "MPG": "videos",
     "MPEG": "videos",
+    "WEBM": "videos",
+    "MKV": "videos",
     "DOCX": "documents",
     "PPTX": "documents",
     "PDF": "documents",
@@ -105,6 +112,12 @@ extension_map = {
     "MD": "documents",
     "HTML": "documents",
     "HTM": "documents",
+    "MP3": "audio",
+    "WAV": "audio",
+    "M4A": "audio",
+    "AAC": "audio",
+    "FLAC": "audio",
+    "OGG": "audio",
     "ZIP": "archives",
     "RAR": "archives",
     "7Z": "archives",
@@ -112,6 +125,136 @@ extension_map = {
     "GZ": "archives",
 }
+def _chunk_text(full_content: str, chunk_size: int) -> List[str]:
+    """Split long content into reasonably sized chunks for model input."""
+    chunks = []
+    for i in range(0, len(full_content), chunk_size):
+        chunk = full_content[i:i+chunk_size].strip()
+        if chunk:
+            chunks.append(chunk)
+    return chunks
+def _transcribe_audio(file_path: str, language: Optional[str] = None) -> str:
+    """
+    Best-effort audio transcription using optional dependencies.
+    Tries faster-whisper, then openai/whisper. Falls back to metadata only.
+    """
+    # Prefer the existing audio module helper if present
+    try:
+        from npcpy.data.audio import transcribe_audio_file  # type: ignore
+        text = transcribe_audio_file(file_path, language=language)
+        if text:
+            return text
+    except Exception:
+        pass
+    # Try faster-whisper first
+    try:
+        from faster_whisper import WhisperModel
+        try:
+            import torch
+            device = "cuda" if torch.cuda.is_available() else "cpu"
+        except Exception:
+            device = "cpu"
+        model = WhisperModel("small", device=device)
+        segments, _ = model.transcribe(file_path, language=language, beam_size=5)
+        return " ".join(seg.text.strip() for seg in segments if seg.text).strip()
+    except Exception:
+        pass
+    # Fallback: openai/whisper
+    try:
+        import whisper
+        model = whisper.load_model("small")
+        result = model.transcribe(file_path, language=language)
+        return result.get("text", "").strip()
+    except Exception:
+        pass
+    # Last resort metadata message
+    return f"[Audio file at {file_path}; install faster-whisper or whisper for transcription]"
+def load_audio(file_path: str, language: Optional[str] = None) -> str:
+    """Load and transcribe an audio file into text."""
+    transcript = _transcribe_audio(file_path, language=language)
+    if transcript:
+        return transcript
+    return f"[Audio file at {file_path}; no transcript available]"
+def _extract_audio_from_video(file_path: str, max_duration: int = 600) -> Optional[str]:
+    """
+    Use ffmpeg to dump the audio track from a video into a temp wav for transcription.
+    Returns the temp path or None.
+    """
+    try:
+        temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
+        temp_audio.close()
+        cmd = [
+            "ffmpeg",
+            "-y",
+            "-i",
+            file_path,
+            "-vn",
+            "-ac",
+            "1",
+            "-ar",
+            "16000",
+            "-t",
+            str(max_duration),
+            temp_audio.name,
+        ]
+        subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        return temp_audio.name
+    except Exception:
+        return None
+def load_video(file_path: str, language: Optional[str] = None, max_audio_seconds: int = 600) -> str:
+    """
+    Summarize a video by reporting metadata and (optionally) transcribing its audio track.
+    """
+    # Prefer the video module helper if present
+    try:
+        from npcpy.data.video import summarize_video_file  # type: ignore
+        return summarize_video_file(file_path, language=language, max_audio_seconds=max_audio_seconds)
+    except Exception:
+        pass
+    # Fallback to minimal summary/transcription
+    meta_bits = []
+    try:
+        import cv2
+        video = cv2.VideoCapture(file_path)
+        fps = video.get(cv2.CAP_PROP_FPS)
+        frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
+        width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
+        height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        duration = frame_count / fps if fps else 0
+        meta_bits.append(
+            f"Video file: {os.path.basename(file_path)} | {width}x{height} | {fps:.2f} fps | {frame_count} frames | ~{duration:.1f}s"
+        )
+        video.release()
+    except Exception:
+        meta_bits.append(f"Video file: {os.path.basename(file_path)}")
+    audio_path = _extract_audio_from_video(file_path, max_duration=max_audio_seconds)
+    transcript = ""
+    if audio_path:
+        try:
+            transcript = _transcribe_audio(audio_path, language=language)
+        finally:
+            try:
+                os.remove(audio_path)
+            except Exception:
+                pass
+    if transcript:
+        meta_bits.append("Audio transcript:")
+        meta_bits.append(transcript)
+    else:
+        meta_bits.append("[No transcript extracted; ensure ffmpeg and faster-whisper/whisper are installed]")
+    return "\n".join(meta_bits)
 def load_file_contents(file_path, chunk_size=None):
     file_ext = os.path.splitext(file_path)[1].upper().lstrip('.')
     full_content = ""
@@ -137,18 +280,17 @@ def load_file_contents(file_path, chunk_size=None):
         elif file_ext == 'JSON':
             data = load_json(file_path)
             full_content = json.dumps(data, indent=2)
+        elif file_ext in ['MP3', 'WAV', 'M4A', 'AAC', 'FLAC', 'OGG']:
+            full_content = load_audio(file_path)
+        elif file_ext in ['MP4', 'AVI', 'MOV', 'WMV', 'MPG', 'MPEG', 'WEBM', 'MKV']:
+            full_content = load_video(file_path)
         else:
             return [f"Unsupported file format for content loading: {file_ext}"]
         if not full_content:
             return []
-        chunks = []
-        for i in range(0, len(full_content), chunk_size):
-            chunk = full_content[i:i+chunk_size].strip()
-            if chunk:
-                chunks.append(chunk)
-        return chunks
+        return _chunk_text(full_content, chunk_size)
     except Exception as e:
         return [f"Error loading file {file_path}: {str(e)}"]

npcpy/data/video.py CHANGED Viewed

@@ -1,4 +1,7 @@
+import os
+import tempfile
+import subprocess
 def process_video(file_path, table_name):
@@ -26,3 +29,72 @@ def process_video(file_path, table_name):
     except Exception as e:
         print(f"Error processing video: {e}")
         return [], []
+def summarize_video_file(file_path: str, language: str = None, max_audio_seconds: int = 600) -> str:
+    """
+    Summarize a video using lightweight metadata plus optional audio transcript.
+    Prefers the audio transcription helper in npcpy.data.audio when available.
+    """
+    meta_bits = []
+    try:
+        import cv2  # type: ignore
+        video = cv2.VideoCapture(file_path)
+        fps = video.get(cv2.CAP_PROP_FPS)
+        frame_count = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
+        width = int(video.get(cv2.CAP_PROP_FRAME_WIDTH))
+        height = int(video.get(cv2.CAP_PROP_FRAME_HEIGHT))
+        duration = frame_count / fps if fps else 0
+        meta_bits.append(
+            f"Video file: {os.path.basename(file_path)} | {width}x{height} | {fps:.2f} fps | {frame_count} frames | ~{duration:.1f}s"
+        )
+        video.release()
+    except Exception:
+        meta_bits.append(f"Video file: {os.path.basename(file_path)}")
+    # Extract audio track with ffmpeg if available
+    audio_path = None
+    try:
+        temp_audio = tempfile.NamedTemporaryFile(delete=False, suffix=".wav")
+        temp_audio.close()
+        cmd = [
+            "ffmpeg",
+            "-y",
+            "-i",
+            file_path,
+            "-vn",
+            "-ac",
+            "1",
+            "-ar",
+            "16000",
+            "-t",
+            str(max_audio_seconds),
+            temp_audio.name,
+        ]
+        subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+        audio_path = temp_audio.name
+    except Exception:
+        audio_path = None
+    transcript = ""
+    if audio_path:
+        try:
+            try:
+                from npcpy.data.audio import transcribe_audio_file
+                transcript = transcribe_audio_file(audio_path, language=language)  # type: ignore
+            except Exception:
+                transcript = ""
+        finally:
+            try:
+                os.remove(audio_path)
+            except Exception:
+                pass
+    if transcript:
+        meta_bits.append("Audio transcript:")
+        meta_bits.append(transcript)
+    else:
+        meta_bits.append("[No transcript extracted; ensure ffmpeg and a transcription backend are installed]")
+    return "\n".join(meta_bits)

npcpy 1.2.34__py3-none-any.whl → 1.2.36__py3-none-any.whl

npcpy 1.2.34py3-none-any.whl → 1.2.36py3-none-any.whl