PyPI - npcpy - Versions diffs - 1.2.34__py3-none-any.whl → 1.2.36__py3-none-any.whl - Mend

npcpy 1.2.34py3-none-any.whl → 1.2.36py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

npcpy/data/audio.py +35 -1
npcpy/data/load.py +149 -7
npcpy/data/video.py +72 -0
npcpy/ft/diff.py +332 -71
npcpy/gen/image_gen.py +120 -23
npcpy/gen/ocr.py +187 -0
npcpy/memory/command_history.py +231 -40
npcpy/npc_compiler.py +64 -22
npcpy/serve.py +1712 -607
{npcpy-1.2.34.dist-info → npcpy-1.2.36.dist-info}/METADATA +1 -1
{npcpy-1.2.34.dist-info → npcpy-1.2.36.dist-info}/RECORD +14 -13
{npcpy-1.2.34.dist-info → npcpy-1.2.36.dist-info}/WHEEL +0 -0
{npcpy-1.2.34.dist-info → npcpy-1.2.36.dist-info}/licenses/LICENSE +0 -0
{npcpy-1.2.34.dist-info → npcpy-1.2.36.dist-info}/top_level.txt +0 -0

npcpy/gen/ocr.py ADDED Viewed

@@ -0,0 +1,187 @@
+"""
+Utilities for running DeepSeek OCR (via Unsloth) to turn images into text.
+This is intentionally lightweight: the model is only downloaded/loaded when
+`DeepSeekOCR.run` is called. You can point `model_id` at a local path or a
+Hugging Face repo ID; we default to the public `unsloth/DeepSeek-OCR`.
+"""
+from __future__ import annotations
+import os
+import tempfile
+from dataclasses import dataclass
+from typing import Optional, Union
+try:
+    from PIL import Image
+except ImportError:
+    Image = None  # Delayed import for lightweight environments
+ImageInput = Union[str, bytes, "Image.Image"]
+@dataclass
+class DeepSeekOCR:
+    """Lazy loader/wrapper around the Unsloth DeepSeek OCR vision model."""
+    model_id: str = "unsloth/DeepSeek-OCR"
+    local_dir: str = os.path.expanduser("~/.npcsh/models/deepseek_ocr")
+    load_in_4bit: bool = False
+    base_size: int = 1024
+    image_size: int = 640
+    crop_mode: bool = True
+    def __post_init__(self) -> None:
+        self._model = None
+        self._tokenizer = None
+    def _ensure_weights(self) -> str:
+        """Download weights if they are not already on-disk."""
+        if os.path.isdir(self.local_dir) and os.listdir(self.local_dir):
+            return self.local_dir
+        os.makedirs(self.local_dir, exist_ok=True)
+        try:
+            from huggingface_hub import snapshot_download
+        except ImportError as exc:
+            raise ImportError(
+                "huggingface_hub is required to download DeepSeek OCR weights. "
+                "Install with `pip install huggingface_hub` or pre-download manually."
+            ) from exc
+        snapshot_download(self.model_id, local_dir=self.local_dir)
+        return self.local_dir
+    def _load_model(self) -> None:
+        """Load the Unsloth vision model once (lazy)."""
+        if self._model is not None and self._tokenizer is not None:
+            return
+        weights_dir = self._ensure_weights()
+        os.environ.setdefault("UNSLOTH_WARN_UNINITIALIZED", "0")
+        try:
+            from unsloth import FastVisionModel
+            from transformers import AutoModel
+        except ImportError as exc:
+            raise ImportError(
+                "unsloth and transformers are required to run DeepSeek OCR. "
+                "Install with `pip install unsloth transformers` (and bitsandbytes if using 4bit)."
+            ) from exc
+        self._model, self._tokenizer = FastVisionModel.from_pretrained(
+            weights_dir,
+            load_in_4bit=self.load_in_4bit,
+            auto_model=AutoModel,
+            trust_remote_code=True,
+            unsloth_force_compile=True,
+            use_gradient_checkpointing="unsloth",
+        )
+    def _prepare_image_file(self, image: ImageInput) -> tuple[str, bool]:
+        """Normalize various image inputs to a file path and say if we should clean it up."""
+        if isinstance(image, str):
+            if not os.path.exists(image):
+                raise FileNotFoundError(f"Image path does not exist: {image}")
+            return image, False
+        if Image is None:
+            raise ImportError("Pillow is required for OCR image handling. Install with `pip install pillow`.")
+        if isinstance(image, bytes):
+            import io
+            pil = Image.open(io.BytesIO(image)).convert("RGB")
+        elif isinstance(image, Image.Image):
+            pil = image.convert("RGB")
+        else:
+            raise TypeError(f"Unsupported image input type: {type(image)}")
+        tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".png")
+        pil.save(tmp, format="PNG")
+        tmp.close()
+        return tmp.name, True
+    def run(
+        self,
+        image: ImageInput,
+        prompt: str = "<image>\nFree OCR. ",
+        output_path: Optional[str] = None,
+        save_results: bool = False,
+        test_compress: bool = False,
+        **kwargs,
+    ) -> str:
+        """
+        Run OCR on an image and return the recognized text.
+        Args:
+            image: Path, bytes, or PIL Image.
+            prompt: Prompt passed to the vision model (keeps the default used
+                in the reference notebook).
+            output_path: Optional directory for saving debug outputs.
+            save_results: If True, Unsloth will save visualization artifacts.
+            test_compress: Forwarded to `model.infer`.
+            kwargs: Additional overrides for infer (base_size, image_size, etc).
+        """
+        self._load_model()
+        image_file, should_cleanup = self._prepare_image_file(image)
+        infer_kwargs = {
+            "prompt": prompt,
+            "image_file": image_file,
+            "output_path": output_path or "",
+            "base_size": kwargs.pop("base_size", self.base_size),
+            "image_size": kwargs.pop("image_size", self.image_size),
+            "crop_mode": kwargs.pop("crop_mode", self.crop_mode),
+            "save_results": save_results,
+            "test_compress": test_compress,
+        }
+        try:
+            result = self._model.infer(self._tokenizer, **infer_kwargs)
+        finally:
+            # Clean up temp files created from bytes/PIL inputs.
+            if should_cleanup and os.path.exists(image_file):
+                try:
+                    os.remove(image_file)
+                except OSError:
+                    pass
+        # Unsloth infer returns a dict-like object; stringify for callers.
+        if isinstance(result, str):
+            return result.strip()
+        if isinstance(result, dict) and "text" in result:
+            return str(result["text"]).strip()
+        return str(result).strip()
+def deepseek_ocr(
+    image: ImageInput,
+    prompt: str = "<image>\nFree OCR. ",
+    model_id: str = "unsloth/DeepSeek-OCR",
+    local_dir: Optional[str] = None,
+    **kwargs,
+) -> str:
+    """
+    Functional wrapper that mirrors the reference notebook defaults.
+    Example:
+        text = deepseek_ocr(\"invoice.png\")
+    """
+    runner = DeepSeekOCR(
+        model_id=model_id,
+        local_dir=local_dir or os.path.expanduser("~/.npcsh/models/deepseek_ocr"),
+        load_in_4bit=kwargs.pop("load_in_4bit", False),
+        base_size=kwargs.pop("base_size", 1024),
+        image_size=kwargs.pop("image_size", 640),
+        crop_mode=kwargs.pop("crop_mode", True),
+    )
+    return runner.run(
+        image=image,
+        prompt=prompt,
+        output_path=kwargs.pop("output_path", None),
+        save_results=kwargs.pop("save_results", False),
+        test_compress=kwargs.pop("test_compress", False),
+        **kwargs,
+    )

npcpy/memory/command_history.py CHANGED Viewed

@@ -405,9 +405,13 @@ def save_kg_to_db(engine: Engine, kg_data: Dict[str, Any], team_name: str, npc_n
 def generate_message_id() -> str:
     return str(uuid.uuid4())
+from sqlalchemy import event, Table, Column, Integer, String, Text
+from sqlalchemy.orm import mapper
 class CommandHistory:
     def __init__(self, db: Union[str, Engine] = "~/npcsh_history.db"):
         if isinstance(db, str):
             self.engine = create_engine_from_path(db)
             self.db_path = db
@@ -415,15 +419,54 @@ class CommandHistory:
             self.engine = db
             self.db_path = str(db.url)
         else:
-            raise TypeError(f"Unsupported type for CommandHistory db parameter: {type(db)}")
+            raise TypeError(f"Unsupported type: {type(db)}")
         self._initialize_schema()
+        self._setup_execution_triggers()
+        self.backfill_execution_tables()
+    def backfill_execution_tables(self):
+        with self.engine.begin() as conn:
+            conn.execute(text("""
+                INSERT OR IGNORE INTO jinx_executions
+                (message_id, jinx_name, input, timestamp, npc, team,
+                conversation_id)
+                SELECT
+                    message_id,
+                    SUBSTR(content, 2,
+                        CASE
+                            WHEN INSTR(SUBSTR(content, 2), ' ') > 0
+                            THEN INSTR(SUBSTR(content, 2), ' ') - 1
+                            ELSE LENGTH(content) - 1
+                        END
+                    ),
+                    content,
+                    timestamp,
+                    npc,
+                    team,
+                    conversation_id
+                FROM conversation_history
+                WHERE role = 'user' AND content LIKE '/%'
+            """))
+            conn.execute(text("""
+                INSERT OR IGNORE INTO npc_executions
+                (message_id, input, timestamp, npc, team, conversation_id,
+                model, provider)
+                SELECT
+                    message_id,
+                    content,
+                    timestamp,
+                    npc,
+                    team,
+                    conversation_id,
+                    model,
+                    provider
+                FROM conversation_history
+                WHERE role = 'user' AND npc IS NOT NULL
+            """))
     def _initialize_schema(self):
-        """Creates all necessary tables."""
         metadata = MetaData()
         Table('command_history', metadata,
             Column('id', Integer, primary_key=True, autoincrement=True),
             Column('timestamp', String(50)),
@@ -433,7 +476,6 @@ class CommandHistory:
             Column('location', Text)
         )
         Table('conversation_history', metadata,
             Column('id', Integer, primary_key=True, autoincrement=True),
             Column('message_id', String(50), unique=True, nullable=False),
@@ -448,33 +490,48 @@ class CommandHistory:
             Column('team', String(100))
         )
         Table('message_attachments', metadata,
             Column('id', Integer, primary_key=True, autoincrement=True),
-            Column('message_id', String(50), ForeignKey('conversation_history.message_id', ondelete='CASCADE'), nullable=False),
+            Column('message_id', String(50),
+                   ForeignKey('conversation_history.message_id',
+                              ondelete='CASCADE'),
+                   nullable=False),
             Column('attachment_name', String(255)),
             Column('attachment_type', String(100)),
             Column('attachment_data', LargeBinary),
             Column('attachment_size', Integer),
             Column('upload_timestamp', String(50)),
-            Column('file_path', Text)
+            Column('file_path', Text)
+        )
+        Table('labels', metadata,
+            Column('id', Integer, primary_key=True, autoincrement=True),
+            Column('entity_type', String(50), nullable=False),
+            Column('entity_id', String(100), nullable=False),
+            Column('label', String(100), nullable=False),
+            Column('metadata', Text),
+            Column('created_at', DateTime, default=func.now())
+        )
+        Table('jinx_executions', metadata,
+            Column('message_id', String(50), primary_key=True),
+            Column('jinx_name', String(100)),
+            Column('input', Text),
+            Column('timestamp', String(50)),
+            Column('npc', String(100)),
+            Column('team', String(100)),
+            Column('conversation_id', String(100))
         )
-        Table('jinx_execution_log', metadata,
-            Column('execution_id', Integer, primary_key=True, autoincrement=True),
-            Column('triggering_message_id', String(50), ForeignKey('conversation_history.message_id', ondelete='CASCADE'), nullable=False),
-            Column('response_message_id', String(50), ForeignKey('conversation_history.message_id', ondelete='SET NULL')),
-            Column('conversation_id', String(100), nullable=False),
-            Column('timestamp', String(50), nullable=False),
-            Column('npc_name', String(100)),
-            Column('team_name', String(100)),
-            Column('jinx_name', String(100), nullable=False),
-            Column('jinx_inputs', Text),
-            Column('jinx_output', Text),
-            Column('status', String(50), nullable=False),
-            Column('error_message', Text),
-            Column('duration_ms', Integer)
+        Table('npc_executions', metadata,
+            Column('message_id', String(50), primary_key=True),
+            Column('input', Text),
+            Column('timestamp', String(50)),
+            Column('npc', String(100)),
+            Column('team', String(100)),
+            Column('conversation_id', String(100)),
+            Column('model', String(100)),
+            Column('provider', String(100))
         )
         Table('memory_lifecycle', metadata,
@@ -492,30 +549,137 @@ class CommandHistory:
             Column('provider', String(100)),
             Column('created_at', DateTime, default=func.now())
         )
         metadata.create_all(self.engine, checkfirst=True)
+        init_kg_schema(self.engine)
+    def _setup_execution_triggers(self):
+        if 'sqlite' in str(self.engine.url):
+            with self.engine.begin() as conn:
+                conn.execute(text("""
+                    CREATE TRIGGER IF NOT EXISTS populate_jinx_executions
+                    AFTER INSERT ON conversation_history
+                    WHEN NEW.role = 'user' AND NEW.content LIKE '/%'
+                    BEGIN
+                        INSERT OR IGNORE INTO jinx_executions
+                        (message_id, jinx_name, input, timestamp, npc, team,
+                         conversation_id)
+                        VALUES (
+                            NEW.message_id,
+                            SUBSTR(NEW.content, 2,
+                                CASE
+                                    WHEN INSTR(SUBSTR(NEW.content, 2), ' ') > 0
+                                    THEN INSTR(SUBSTR(NEW.content, 2), ' ') - 1
+                                    ELSE LENGTH(NEW.content) - 1
+                                END
+                            ),
+                            NEW.content,
+                            NEW.timestamp,
+                            NEW.npc,
+                            NEW.team,
+                            NEW.conversation_id
+                        );
+                    END
+                """))
+                conn.execute(text("""
+                    CREATE TRIGGER IF NOT EXISTS populate_npc_executions
+                    AFTER INSERT ON conversation_history
+                    WHEN NEW.role = 'user' AND NEW.npc IS NOT NULL
+                    BEGIN
+                        INSERT OR IGNORE INTO npc_executions
+                        (message_id, input, timestamp, npc, team,
+                         conversation_id, model, provider)
+                        VALUES (
+                            NEW.message_id,
+                            NEW.content,
+                            NEW.timestamp,
+                            NEW.npc,
+                            NEW.team,
+                            NEW.conversation_id,
+                            NEW.model,
+                            NEW.provider
+                        );
+                    END
+                """))
+    def get_jinx_executions(self, jinx_name: str = None, limit: int = 1000) -> List[Dict]:
+        if jinx_name:
+            stmt = """
+                SELECT je.*, l.label
+                FROM jinx_executions je
+                LEFT JOIN labels l ON l.entity_type = 'message'
+                    AND l.entity_id = je.message_id
+                WHERE je.jinx_name = :jinx_name
+                ORDER BY je.timestamp DESC
+                LIMIT :limit
+            """
+            return self._fetch_all(stmt, {"jinx_name": jinx_name, "limit": limit})
+        stmt = """
+            SELECT je.*, l.label
+            FROM jinx_executions je
+            LEFT JOIN labels l ON l.entity_type = 'message'
+                AND l.entity_id = je.message_id
+            ORDER BY je.timestamp DESC
+            LIMIT :limit
+        """
+        return self._fetch_all(stmt, {"limit": limit})
+    def get_npc_executions(self, npc_name: str, limit: int = 1000) -> List[Dict]:
+        stmt = """
+            SELECT ne.*, l.label
+            FROM npc_executions ne
+            LEFT JOIN labels l ON l.entity_type = 'message'
+                AND l.entity_id = ne.message_id
+            WHERE ne.npc = :npc_name
+            ORDER BY ne.timestamp DESC
+            LIMIT :limit
+        """
+        return self._fetch_all(stmt, {"npc_name": npc_name, "limit": limit})
+    def label_execution(self, message_id: str, label: str):
+        self.add_label('message', message_id, label)
+    def add_label(self, entity_type: str, entity_id: str, label: str, metadata: dict = None):
+        stmt = """
+            INSERT INTO labels (entity_type, entity_id, label, metadata)
+            VALUES (:entity_type, :entity_id, :label, :metadata)
+        """
         with self.engine.begin() as conn:
-            index_queries = [
-                "CREATE INDEX IF NOT EXISTS idx_jinx_log_trigger_msg ON jinx_execution_log (triggering_message_id)",
-                "CREATE INDEX IF NOT EXISTS idx_jinx_log_convo_id ON jinx_execution_log (conversation_id)",
-                "CREATE INDEX IF NOT EXISTS idx_jinx_log_jinx_name ON jinx_execution_log (jinx_name)",
-                "CREATE INDEX IF NOT EXISTS idx_jinx_log_timestamp ON jinx_execution_log (timestamp)"
-            ]
-            for idx_query in index_queries:
-                try:
-                    conn.execute(text(idx_query))
-                except SQLAlchemyError:
-                    pass
+            conn.execute(text(stmt), {
+                "entity_type": entity_type,
+                "entity_id": entity_id,
+                "label": label,
+                "metadata": json.dumps(metadata) if metadata else None
+            })
+    def get_labels(self, entity_type: str = None, label: str = None) -> List[Dict]:
+        conditions = []
+        params = {}
+        if entity_type:
+            conditions.append("entity_type = :entity_type")
+            params["entity_type"] = entity_type
+        if label:
+            conditions.append("label = :label")
+            params["label"] = label
-        init_kg_schema(self.engine)
+        where = f"WHERE {' AND '.join(conditions)}" if conditions else ""
+        stmt = f"SELECT * FROM labels {where} ORDER BY created_at DESC"
+        return self._fetch_all(stmt, params)
+    def get_training_data_by_label(self, label: str = 'training') -> List[Dict]:
+        stmt = """
+            SELECT l.entity_type, l.entity_id, l.metadata,
+                ch.content, ch.role, ch.npc, ch.conversation_id
+            FROM labels l
+            LEFT JOIN conversation_history ch ON
+                (l.entity_type = 'message' AND l.entity_id = ch.message_id)
+            WHERE l.label = :label
+        """
+        return self._fetch_all(stmt, {"label": label})
     def _execute_returning_id(self, stmt: str, params: Dict = None) -> Optional[int]:
         """Execute INSERT and return the generated ID"""
         with self.engine.begin() as conn:
@@ -535,6 +699,7 @@ class CommandHistory:
             result = conn.execute(text(stmt), params or {})
             return [dict(row._mapping) for row in result]
     def add_command(self, command, subcommands, output, location):
         timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
         stmt = """
@@ -1092,6 +1257,32 @@ def start_new_conversation(prepend: str = None) -> str:
         prepend = 'npcsh'
     return f"{prepend}_{datetime.now().strftime('%Y%m%d%H%M%S')}"
+def format_memory_context(memory_examples):
+    if not memory_examples:
+        return ""
+    context_parts = []
+    approved_examples = memory_examples.get("approved", [])
+    rejected_examples = memory_examples.get("rejected", [])
+    if approved_examples:
+        context_parts.append("EXAMPLES OF GOOD MEMORIES:")
+        for ex in approved_examples[:5]:
+            final = ex.get("final_memory") or ex.get("initial_memory")
+            context_parts.append(f"- {final}")
+    if rejected_examples:
+        context_parts.append("\nEXAMPLES OF POOR MEMORIES TO AVOID:")
+        for ex in rejected_examples[:3]:
+            context_parts.append(f"- {ex.get('initial_memory')}")
+    if context_parts:
+        context_parts.append("\nLearn from these examples to generate similar high-quality memories.")
+        return "\n".join(context_parts)
+    return ""
 def save_conversation_message(
     command_history: CommandHistory,
     conversation_id: str,

npcpy 1.2.34__py3-none-any.whl → 1.2.36__py3-none-any.whl

npcpy 1.2.34py3-none-any.whl → 1.2.36py3-none-any.whl