PyPI - openvector_dev - Versions diffs - 0.1.8__tar.gz → 0.1.10__tar.gz - Mend

openvector_dev 0.1.8tar.gz → 0.1.10tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

{openvector_dev-0.1.8 → openvector_dev-0.1.10}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: openvector_dev
-Version: 0.1.8
+Version: 0.1.10
 Summary:
 Author: p00ler
 Author-email: liveitspain@gmail.com

{openvector_dev-0.1.8 → openvector_dev-0.1.10}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "openvector_dev"
-version = "0.1.8"
+version = "0.1.10"
 description = ""
 authors = [
     {name = "p00ler",email = "liveitspain@gmail.com"}

{openvector_dev-0.1.8 → openvector_dev-0.1.10}/src/lein_vector/api/facade.py RENAMED Viewed

@@ -1,13 +1,13 @@
 from collections.abc import Sequence
 from datetime import UTC, datetime
-from uuid import uuid4
+from typing import Optional, Any
+from uuid import UUID, uuid4
 import redis.asyncio as aioredis
 from lein_vector import MemoryManagerQdrant, QdrantAdapter, RedisShortTermMemory
 from lein_vector.schemas.chunk import Chunk, ChunkPayload
 from lein_vector.sentence_transformer import EmbeddingProviderGemini
-import warnings
 class Memory:
@@ -115,12 +115,15 @@ class Memory:
                 extra={"msg_no": curr_no},
             )
             await self.long.upsert_chunk_with_vector(new_chunk, vector)
-            # self.short.clear_until(block_size)
-        if curr_no % 40 == 0:
-            await self.long.merge_old_chunks(user_id, "type0", n=self.merge_n)
+        if curr_no % (block_size * self.merge_n) == 0:
+            return await self.long.merge_old_chunks(user_id=user_id, bot=bot, chunk_type="type0", n=self.merge_n)
+        else:
+            return None, None
-    async def get_long_memories(self, user_id: int, bot: str, search_terms: list[str], topk: int = 3) -> list:
+    async def get_long_memories(
+        self, user_id: int, bot: str, search_terms: list[str], topk: int = 3
+    ) -> list:
         """
         Возвращает ТОЛЬКО длительную память по списку тем.
         :param bot: Кодовое имя бота
@@ -139,7 +142,9 @@ class Memory:
             filter_={"chunk_type": "type1"},
         )
-    async def get_short_memories(self, user_id: int, bot: str, n_memories: int = 20) -> list:
+    async def get_short_memories(
+        self, user_id: int, bot: str, n_memories: int = 20
+    ) -> list:
         """
         Возвращает ТОЛЬКО кратковременную память по списку тем
         :param bot: Кодовое имя бота
@@ -150,7 +155,9 @@ class Memory:
         data = await self.short.window(user_id, bot, n_memories)
         return self._to_openai(data)
-    async def add_short_msg(self, user_id: int, bot: str, text: str, *, role: str = "user"):
+    async def add_short_msg(
+        self, user_id: int, bot: str, text: str, *, role: str = "user"
+    ):
         """
         Добавляет в "короткую память" новое сообщение
         :param user_id: Идентификатор пользователя
@@ -162,12 +169,60 @@ class Memory:
         """
         await self.short.add(bot=bot, user_id=user_id, role=role, text=text)
-    async def delete_memory(self, user_id: int) -> None:
+    async def add_summary_chunk(
+        self,
+        user_id: int,
+        bot: str,
+        text: str,
+        old_chunks: list[UUID],
+        *,
+        chunk_type: str = "type1",
+    ) -> None:
+        """
+        Ручное добавление summary-чанка:
+        1) upsert нового чанка-саммари;
+        2) удаление использованных чанков.
+        :param user_id:     id пользователя
+        :param bot:         кодовое имя бота
+        :param text:        текст саммари
+        :param old_chunks:  список UUID чанков, вошедших в саммари
+        :param chunk_type:  тип нового чанка (по умолчанию 'type1')
+        """
+        # Эмбеддинг для саммари
+        embedding = await self.embed.get_embedding(text)
+        # Создаём новый Chunk
+        summary_chunk = Chunk(
+            chunk_id=uuid4(),
+            bot=bot,
+            user_id=user_id,
+            chunk_type=chunk_type,
+            created_at=datetime.now(UTC),
+            last_hit=datetime.now(UTC),
+            hit_count=0,
+            text=text,
+            persistent=False,
+            summary_of=old_chunks,
+        )
+        # Записываем в Qdrant
+        await self.long.upsert_chunk_with_vector(
+            user_id=user_id,
+            chunk=summary_chunk,
+            embedding=embedding,
+        )
+        # Удаляем исходные чанки
+        if old_chunks:
+            await self.long.delete_chunks(user_id, old_chunks)
+    async def delete_memory(self, user_id: int, bot: str) -> None:
         """
         Удаляет
         :param user_id: Идентификатор пользователя.
+        :param bot: Codename бота
         """
-        await self.long.delete_all(user_id)
+        await self.long.delete_all(user_id, bot)
     @staticmethod
     def _chunk_texts(chunks: Sequence[Chunk | ChunkPayload]) -> list[str]:

{openvector_dev-0.1.8 → openvector_dev-0.1.10}/src/lein_vector/bases/memory_manager_abc.py RENAMED Viewed

@@ -1,9 +1,13 @@
 from uuid import UUID
 from lein_vector.schemas.chunk import Chunk
 class MemoryManagerABC:
     async def upsert_chunk(self, user_id: int, bot: str, chunk: Chunk) -> None: ...
-    async def upsert_chunks(self, user_id: int, bot: str, chunks: list[Chunk]) -> None: ...
+    async def upsert_chunks(
+        self, user_id: int, bot: str, chunks: list[Chunk]
+    ) -> None: ...
     async def retrieve_by_embedding(
         self, user_id: int, bot: str, embedding: list[float], topk: int = 3
     ) -> list[Chunk]: ...
@@ -18,14 +22,20 @@ class MemoryManagerABC:
     async def retrieve_by_text(
         self, user_id: int, bot: str, query: str, topk: int = 3
     ) -> list[Chunk]: ...
-    async def merge_old_chunks(self, user_id: int, bot: str, chunk_type: str) -> None: ...
+    async def merge_old_chunks(
+        self, user_id: int, bot: str, chunk_type: str
+    ) -> None: ...
     async def archive_user(self, user_id: int, bot: str) -> None: ...
     async def restore_user(self, user_id: int, bot: str) -> None: ...
     async def increment_hit(self, user_id: int, bot: str, chunk_id: UUID) -> None: ...
     async def pop_first_n(
         self, user_id: int, bot: str, chunk_type: str, n: int
     ) -> list[Chunk]: ...
-    async def delete_oldest_nonpersistent(self, user_id: int, bot: str, keep: int) -> None: ...
+    async def delete_oldest_nonpersistent(
+        self, user_id: int, bot: str, keep: int
+    ) -> None: ...
     async def delete_chunk(self, user_id: int, bot: str, chunk_id: UUID) -> None: ...
-    async def delete_chunks(self, user_id: int, bot: str, chunk_ids: list[UUID]) -> None: ...
+    async def delete_chunks(
+        self, user_id: int, bot: str, chunk_ids: list[UUID]
+    ) -> None: ...
     async def delete_all(self, user_id: int, bot: str) -> None: ...

{openvector_dev-0.1.8 → openvector_dev-0.1.10}/src/lein_vector/memory_manager_qdrant.py RENAMED Viewed

@@ -1,6 +1,6 @@
 import asyncio
 from datetime import UTC, datetime
-from typing import Any
+from typing import Any, Tuple, List
 from uuid import UUID
 from lein_vector.bases.memory_manager_abc import MemoryManagerABC
@@ -77,13 +77,11 @@ class MemoryManagerQdrant(MemoryManagerABC):
             )
         async def _one(e):
-            return await self.qdrant.search(
-                e, q_filter, topk, score_threshold
-            )
+            return await self.qdrant.search(e, q_filter, topk, score_threshold)
         return await asyncio.gather(*[_one(e) for e in embeddings])
-    #ToDO: filter
+    # ToDO: filter
     async def retrieve_by_type(
         self, user_id: int, bot: str, chunk_type: str, topk: int = 3
     ) -> list[ChunkPayload]:
@@ -91,30 +89,34 @@ class MemoryManagerQdrant(MemoryManagerABC):
         filter_ = {"user_id": user_id, "bot": bot, "chunk_type": chunk_type}
         return await self.qdrant.get_all_chunks_with_filter(filter_)
-    async def merge_old_chunks(self, user_id: int, bot: str, chunk_type: str, n: int = 5) -> None:
+    async def merge_old_chunks(
+            self,
+            user_id: int,
+            bot: str,
+            chunk_type: str,
+            n: int = 5,
+    ) -> Tuple[str | None, List[UUID]]:
+        """
+        Собрать n старых чанков и вернуть:
+          • merged_text — склеенные сообщения c ролями,
+          • used_ids    — UUID этих чанков.
+        Если чанков меньше n — вернётся (None, []).
+        """
         chunks = await self.qdrant.get_n_oldest_chunks(user_id, bot, chunk_type, n)
         if len(chunks) < n:
-            return
-        merged_text = " | ".join([c.text for c in chunks])
-        from datetime import datetime
-        from uuid import uuid4
-        # ToDo: LLM summary
-        summary_chunk = Chunk(
-            chunk_id=uuid4(),
-            user_id=user_id,
-            bot=bot,
-            chunk_type=self._next_type(chunk_type),
-            created_at=datetime.now(UTC),
-            last_hit=datetime.now(UTC),
-            hit_count=0,
-            text=merged_text,
-            persistent=False,
-            summary_of=[c.chunk_id for c in chunks],
-        )
-        await self.upsert_chunk(user_id, bot, summary_chunk)
+            return None, []
+        def _ensure_role(txt: str, default_role: str = "gf") -> str:
+            # если строка уже начинается с 'role: ', оставляем как есть
+            if txt.split(":", 1)[0] in {"user", "gf", "assistant"}:
+                return txt
+            return f"{default_role}: {txt}"
-        await self.delete_chunks(user_id, bot, [c.chunk_id for c in chunks])
+        merged_text = "\n".join(_ensure_role(c.text) for c in chunks)
+        used_ids = [c.chunk_id for c in chunks]
+        return merged_text, used_ids
     async def archive_user(self, user_id: int, bot: str) -> None:
         all_chunks = await self.qdrant.get_all_chunks(user_id, bot)
@@ -134,7 +136,9 @@ class MemoryManagerQdrant(MemoryManagerABC):
     async def delete_chunk(self, user_id: int, bot: str, chunk_id: UUID) -> None:
         await self.qdrant.delete(chunk_id)
-    async def delete_chunks(self, user_id: int, bot: str, chunk_ids: list[UUID]) -> None:
+    async def delete_chunks(
+        self, user_id: int, bot: str, chunk_ids: list[UUID]
+    ) -> None:
         await self.qdrant.delete_batch(chunk_ids)
     async def delete_all(self, user_id: int, bot: str) -> None:

{openvector_dev-0.1.8 → openvector_dev-0.1.10}/src/lein_vector/qdrant_adapter.py RENAMED Viewed

@@ -109,13 +109,21 @@ class QdrantAdapter:
     async def delete_collection(self) -> None:
         await self.client.delete_collection(collection_name=self.collection)
-    async def get_all_chunks(self, user_id: int) -> list[ChunkPayload]:
+    async def get_all_chunks(self, user_id: int, bot: str) -> list[ChunkPayload]:
+        """
+        Вернуть ВСЕ чанки заданного пользователя и конкретного бота.
+        """
         q_filter = Filter(
-            must=[FieldCondition(key="user_id", match=MatchValue(value=user_id))]
+            must=[
+                FieldCondition(key="user_id", match=MatchValue(value=user_id)),
+                FieldCondition(key="bot",      match=MatchValue(value=bot)),
+            ]
         )
         scroll = await self.client.scroll(
             collection_name=self.collection,
             scroll_filter=q_filter,
             limit=2048,
         )
         return [ChunkPayload(**p.payload) for p in scroll[0]]

{openvector_dev-0.1.8 → openvector_dev-0.1.10}/src/lein_vector/schemas/chunk.py RENAMED Viewed

@@ -36,7 +36,7 @@ class ChunkPayload(BaseModel):
     source_chunk_id: UUID | None = None
     extra: dict | None = Field(default_factory=dict)
-    def to_chunk(self, last_hit: datetime = None, hit_count: int = 0) -> Chunk:
+    def to_chunk(self, last_hit: datetime | None = None, hit_count: int = 0) -> Chunk:
         return Chunk(
             **self.model_dump(),
             last_hit=last_hit or datetime.now(UTC),

{openvector_dev-0.1.8 → openvector_dev-0.1.10}/src/lein_vector/sentence_transformer.py RENAMED Viewed

@@ -22,6 +22,7 @@ class EmbeddingProviderGemini(EmbeddingProviderABC):
     async def get_embedding(self, text: str) -> list[float]:
         # В Gemini SDK обычно нет async, значит — обёртка через run_in_executor:
         import asyncio
         if not isinstance(text, str):
             return
         loop = asyncio.get_running_loop()