PyPI - projectdavid - Versions diffs - 1.31.0__py3-none-any.whl → 1.38.1__py3-none-any.whl - Mend

projectdavid 1.31.0py3-none-any.whl → 1.38.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

projectdavid/clients/assistants_client.py +7 -13
projectdavid/clients/file_processor.py +102 -107
projectdavid/clients/messages_client.py +24 -39
projectdavid/clients/runs.py +156 -211
projectdavid/clients/synchronous_inference_wrapper.py +52 -24
projectdavid/clients/threads_client.py +32 -12
projectdavid/clients/vector_store_manager.py +110 -21
projectdavid/clients/vectors.py +47 -30
projectdavid/clients/vision-file_processor.py +462 -0
projectdavid/clients/vision_vectors.py +1058 -0
projectdavid/decorators.py +64 -0
projectdavid/entity.py +24 -5
projectdavid/synthesis/reranker.py +4 -2
projectdavid/utils/function_call_suppressor.py +40 -0
{projectdavid-1.31.0.dist-info → projectdavid-1.38.1.dist-info}/METADATA +6 -7
{projectdavid-1.31.0.dist-info → projectdavid-1.38.1.dist-info}/RECORD +19 -15
{projectdavid-1.31.0.dist-info → projectdavid-1.38.1.dist-info}/WHEEL +1 -1
{projectdavid-1.31.0.dist-info → projectdavid-1.38.1.dist-info}/licenses/LICENSE +0 -0
{projectdavid-1.31.0.dist-info → projectdavid-1.38.1.dist-info}/top_level.txt +0 -0

projectdavid/clients/assistants_client.py CHANGED Viewed

@@ -112,7 +112,6 @@ class AssistantsClient(BaseAPIClient):
         description: str = "",
         instructions: str = "",
         tools: Optional[List[Dict[str, Any]]] = None,
-        platform_tools: Optional[List[Dict[str, Any]]] = None,
         tool_resources: Optional[Dict[str, Dict[str, Any]]] = None,
         meta_data: Optional[Dict[str, Any]] = None,
         top_p: float = 1.0,
@@ -133,7 +132,6 @@ class AssistantsClient(BaseAPIClient):
             "model": model,
             "instructions": instructions,
             "tools": tools,
-            "platform_tools": platform_tools,
             "tool_resources": tool_resources,
             "meta_data": meta_data,
             "top_p": top_p,
@@ -263,14 +261,10 @@ class AssistantsClient(BaseAPIClient):
         )
         return {"message": "Assistant disassociated from user successfully"}
-    def list_assistants_by_user(
-        self, user_id: str
-    ) -> List[ent_validator.AssistantRead]:
-        logging_utility.info("Listing assistants for user id=%s", user_id)
-        try:
-            resp = self._request_with_retries("GET", f"/v1/users/{user_id}/assistants")
-            raw_list = self._parse_response(resp)
-            return [ent_validator.AssistantRead(**a) for a in raw_list]
-        except ValidationError as e:
-            logging_utility.error("Validation error: %s", e.json())
-            raise AssistantsClientError(f"Validation error: {e}") from e
+    def list(self) -> list[ent_validator.AssistantRead]:
+        """Return every assistant owned by *this* API key."""
+        logging_utility.info("Listing assistants")
+        resp = self._request_with_retries("GET", "/v1/assistants")
+        raw = self._parse_response(resp)
+        return [ent_validator.AssistantRead(**a) for a in raw]

projectdavid/clients/file_processor.py CHANGED Viewed

@@ -1,7 +1,6 @@
 import asyncio
 import csv
 import json
-import mimetypes
 import re
 import textwrap
 from concurrent.futures import ThreadPoolExecutor
@@ -10,16 +9,14 @@ from typing import Any, Dict, List, Tuple, Union
 try:  # Python 3.11+
     from typing import LiteralString
-except ImportError:  # 3.9 - 3.10
+except ImportError:  # 3.9–3.10
     from typing_extensions import LiteralString
-import magic
 import numpy as np
 import pdfplumber
 from docx import Document
 from pptx import Presentation
 from projectdavid_common import UtilsInterface
-from sentence_transformers import SentenceTransformer
 log = UtilsInterface.LoggingUtility()
@@ -29,17 +26,85 @@ class FileProcessor:
     #  Construction
     # ------------------------------------------------------------------ #
     def __init__(self, max_workers: int = 4, chunk_size: int = 512):
-        self.embedding_model = SentenceTransformer("paraphrase-MiniLM-L6-v2")
         self.embedding_model_name = "paraphrase-MiniLM-L6-v2"
+        self._embedding_model = None
         self._executor = ThreadPoolExecutor(max_workers=max_workers)
-        # token limits
-        self.max_seq_length = self.embedding_model.get_max_seq_length()
-        self.special_tokens_count = 2
-        self.effective_max_length = self.max_seq_length - self.special_tokens_count
-        self.chunk_size = min(chunk_size, self.effective_max_length * 4)
+        # Lazy-initialized attributes
+        self._requested_chunk_size = chunk_size
+        self._max_seq_length = None
+        self._effective_max_length = None
+        self._chunk_size = None
-        log.info("Initialized optimized FileProcessor")
+        log.info("Initialized Lazy-Loaded FileProcessor")
+    def _ensure_model(self):
+        """
+        Internal helper to load the model and calculate limits only once.
+        This prevents heavy imports (scipy, torch) until actually needed.
+        """
+        if self._embedding_model is None:
+            try:
+                from sentence_transformers import SentenceTransformer
+                log.info(f"Lazy-loading model: {self.embedding_model_name}")
+                self._embedding_model = SentenceTransformer(self.embedding_model_name)
+                # Ported Limit Calculations
+                self._max_seq_length = self._embedding_model.get_max_seq_length()
+                special_tokens_count = 2
+                self._effective_max_length = self._max_seq_length - special_tokens_count
+                self._chunk_size = min(
+                    self._requested_chunk_size, self._effective_max_length * 4
+                )
+            except ImportError:
+                log.error(
+                    "sentence-transformers not found. Ensure 'pip install projectdavid[vision]' is installed."
+                )
+                raise ImportError(
+                    "Model-based features require 'sentence-transformers'. Install with [vision] extra."
+                )
+        return self._embedding_model
+    # Properties to maintain access to derived attributes
+    @property
+    def chunk_size(self):
+        if self._chunk_size is None:
+            self._ensure_model()
+        return self._chunk_size
+    @property
+    def effective_max_length(self):
+        if self._effective_max_length is None:
+            self._ensure_model()
+        return self._effective_max_length
+    # ------------------------------------------------------------------ #
+    #  Embeddings
+    # ------------------------------------------------------------------ #
+    def encode_text(self, text: str):
+        model = self._ensure_model()
+        return model.encode(
+            [text],
+            convert_to_numpy=True,
+            truncate="model_max_length",
+            normalize_embeddings=True,
+        )[0]
+    async def _encode_chunk_async(self, chunk: str) -> np.ndarray:
+        model = self._ensure_model()
+        return await asyncio.get_event_loop().run_in_executor(
+            self._executor,
+            lambda: model.encode(
+                [chunk],
+                convert_to_numpy=True,
+                truncate="model_max_length",
+                normalize_embeddings=True,
+                show_progress_bar=False,
+            )[0],
+        )
     # ------------------------------------------------------------------ #
     #  Generic validators
@@ -54,54 +119,20 @@ class FileProcessor:
             raise ValueError(f"{file_path.name} > {mb} MB limit")
     # ------------------------------------------------------------------ #
-    #  File-type detection (extension + MIME)
+    #  File-type detection
     # ------------------------------------------------------------------ #
     def _detect_file_type(self, file_path: Path) -> str:
-        """
-        Return a handler tag:
-            • 'pdf'     • 'csv'
-            • 'json'    • 'office'
-            • 'text'
-        Raises *ValueError* on anything unknown.
-        """
-        # 1️⃣  Best-effort MIME sniff
-        mime_type: str | None = None
-        if magic is not None:
-            try:
-                mime_type = magic.from_file(str(file_path), mime=True)
-            except Exception:
-                mime_type = None
-        # 2️⃣  Fallback → mimetypes
-        if not mime_type:
-            mime_type, _ = mimetypes.guess_type(file_path.name)
         suffix = file_path.suffix.lower()
+        if suffix == ".pdf":
+            return "pdf"
+        if suffix == ".csv":
+            return "csv"
+        if suffix == ".json":
+            return "json"
+        if suffix in {".doc", ".docx", ".pptx"}:
+            return "office"
-        PDF_MIMES = {"application/pdf"}
-        CSV_MIMES = {"text/csv", "application/csv"}
-        JSON_MIMES = {"application/json"}
-        OFFICE_MIMES = {
-            "application/msword",
-            "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
-            "application/vnd.openxmlformats-officedocument.presentationml.presentation",
-        }
-        TEXT_MIMES = {
-            "text/plain",
-            "text/markdown",
-            "text/x-python",
-            "text/x-c",
-            "text/x-c++",
-            "text/x-java-source",
-            "text/x-script.python",
-            "text/html",
-            "text/css",
-            "application/typescript",
-            "text/javascript",
-        }
-        TEXT_EXTS = {
+        text_exts = {
             ".txt",
             ".md",
             ".rst",
@@ -120,32 +151,9 @@ class FileProcessor:
             ".html",
             ".css",
         }
-        # --- PDF ---
-        if mime_type in PDF_MIMES or suffix == ".pdf":
-            return "pdf"
-        # --- CSV ---
-        if mime_type in CSV_MIMES or suffix == ".csv":
-            return "csv"
-        # --- JSON ---
-        if mime_type in JSON_MIMES or suffix == ".json":
-            return "json"
-        # --- Office documents ---
-        if mime_type in OFFICE_MIMES or suffix in {".doc", ".docx", ".pptx"}:
-            return "office"
-        # --- Generic text / code / markup ---
-        if mime_type in TEXT_MIMES or suffix in TEXT_EXTS:
+        if suffix in text_exts:
             return "text"
-        # --- Unsupported ---
-        raise ValueError(
-            f"Unsupported file type for '{file_path.name}': "
-            f"MIME={mime_type or 'unknown'}  extension={suffix}"
-        )
+        raise ValueError(f"Unsupported file type: {file_path.name} (ext={suffix})")
     # ------------------------------------------------------------------ #
     #  Public entry-point
@@ -156,19 +164,17 @@ class FileProcessor:
         self.validate_file(file_path)
         ftype = self._detect_file_type(file_path)
-        if ftype == "pdf":
-            return await self._process_pdf(file_path)
-        if ftype == "text":
-            return await self._process_text(file_path)
-        if ftype == "csv":
-            return await self._process_csv(file_path)
-        if ftype == "office":
-            return await self._process_office(file_path)
-        if ftype == "json":
-            return await self._process_json(file_path)
+        dispatch_map = {
+            "pdf": self._process_pdf,
+            "text": self._process_text,
+            "csv": self._process_csv,
+            "office": self._process_office,
+            "json": self._process_json,
+        }
+        if ftype not in dispatch_map:
+            raise ValueError(f"Unsupported file type: {file_path.suffix}")
-        # Safety net (should never hit)
-        raise ValueError(f"Unsupported file type: {file_path.suffix}")
+        return await dispatch_map[ftype](file_path)
     # ------------------------------------------------------------------ #
     #  PDF
@@ -239,7 +245,7 @@ class FileProcessor:
     async def _process_csv(
         self, file_path: Path, text_field: str = "description"
     ) -> Dict[str, Any]:
-        rows, texts, metas = [], [], []
+        texts, metas = [], []
         with file_path.open(newline="", encoding="utf-8") as f:
             reader = csv.DictReader(f)
             for row in reader:
@@ -369,18 +375,6 @@ class FileProcessor:
         pretty = json.dumps(obj, indent=2, ensure_ascii=False)
         return "\n".join(textwrap.wrap(pretty, width=120))
-    async def _encode_chunk_async(self, chunk: str) -> np.ndarray:
-        return await asyncio.get_event_loop().run_in_executor(
-            self._executor,
-            lambda: self.embedding_model.encode(
-                [chunk],
-                convert_to_numpy=True,
-                truncate="model_max_length",
-                normalize_embeddings=True,
-                show_progress_bar=False,
-            )[0],
-        )
     # ------------------------------------------------------------------ #
     #  Text chunking helpers
     # ------------------------------------------------------------------ #
@@ -405,10 +399,11 @@ class FileProcessor:
         return chunks
     def _split_oversized_chunk(self, chunk: str, tokens: List[str] = None) -> List[str]:
+        model = self._ensure_model()  # Ensure model is loaded to access tokenizer
         if tokens is None:
-            tokens = self.embedding_model.tokenizer.tokenize(chunk)
+            tokens = model.tokenizer.tokenize(chunk)
         out = []
         for i in range(0, len(tokens), self.effective_max_length):
             seg = tokens[i : i + self.effective_max_length]
-            out.append(self.embedding_model.tokenizer.convert_tokens_to_string(seg))
+            out.append(model.tokenizer.convert_tokens_to_string(seg))
         return out

projectdavid/clients/messages_client.py CHANGED Viewed

@@ -153,18 +153,21 @@ class MessagesClient(BaseAPIClient):
             raise
     def list_messages(
-        self, thread_id: str, limit: int = 20, order: str = "asc"
-    ) -> List[Dict[str, Any]]:
+        self,
+        thread_id: str,
+        limit: int = 20,
+        order: str = "asc",
+    ) -> ent_validator.MessagesList:
         """
-        List messages for a given thread.
+        Fetch messages for a thread and return an OpenAI-style envelope.
         Args:
-            thread_id (str): The thread ID.
-            limit (int): Maximum number of messages to retrieve.
-            order (str): Order of messages ('asc' or 'desc').
+            thread_id (str): Target thread ID.
+            limit (int): Max messages to fetch.
+            order (str): 'asc' or 'desc'.
         Returns:
-            List[Dict[str, Any]]: A list of messages as dictionaries.
+            MessagesList: Wrapper containing .data[], .first_id, .last_id, .has_more …
         """
         logging_utility.info(
             "Listing messages for thread_id: %s, limit: %d, order: %s",
@@ -178,24 +181,19 @@ class MessagesClient(BaseAPIClient):
                 f"/v1/threads/{thread_id}/messages", params=params
             )
             response.raise_for_status()
-            messages = response.json()
-            validated_messages = [
-                ent_validator.MessageRead(**message) for message in messages
-            ]
-            logging_utility.info("Retrieved %d messages", len(validated_messages))
-            return [message.dict() for message in validated_messages]
+            envelope = ent_validator.MessagesList(**response.json())
+            logging_utility.info("Retrieved %d messages", len(envelope.data))
+            return envelope
         except ValidationError as e:
             logging_utility.error("Validation error: %s", e.json())
-            raise ValueError(f"Validation error: {e}")
+            raise ValueError(f"Validation error: {e}") from e
         except httpx.HTTPStatusError as e:
-            logging_utility.error(
-                "HTTP error occurred while listing messages: %s", str(e)
-            )
+            logging_utility.error("HTTP error while listing messages: %s", str(e))
             raise
         except Exception as e:
-            logging_utility.error(
-                "An error occurred while listing messages: %s", str(e)
-            )
+            logging_utility.error("Unexpected error while listing messages: %s", str(e))
             raise
     def get_formatted_messages(
@@ -294,32 +292,19 @@ class MessagesClient(BaseAPIClient):
             logging_utility.error("An error occurred: %s", str(e))
             raise RuntimeError(f"An error occurred: {str(e)}")
-    def delete_message(self, message_id: str) -> Dict[str, Any]:
-        """
-        Delete a message by its ID.
-        Args:
-            message_id (str): The ID of the message.
-        Returns:
-            Dict[str, Any]: The deletion result.
-        """
+    def delete_message(self, message_id: str) -> ent_validator.MessageDeleted:
+        """Delete a message and return deletion envelope."""
         logging_utility.info("Deleting message with id: %s", message_id)
         try:
             response = self.client.delete(f"/v1/messages/{message_id}")
             response.raise_for_status()
-            result = response.json()
-            logging_utility.info("Message deleted successfully")
-            return result
+            return ent_validator.MessageDeleted(**response.json())
         except httpx.HTTPStatusError as e:
-            logging_utility.error(
-                "HTTP error occurred while deleting message: %s", str(e)
-            )
+            logging_utility.error("HTTP error while deleting message: %s", str(e))
             raise
         except Exception as e:
-            logging_utility.error(
-                "An error occurred while deleting message: %s", str(e)
-            )
+            logging_utility.error("Unexpected error while deleting message: %s", str(e))
             raise
     def save_assistant_message_chunk(

projectdavid 1.31.0__py3-none-any.whl → 1.38.1__py3-none-any.whl

projectdavid 1.31.0py3-none-any.whl → 1.38.1py3-none-any.whl