PyPI - AstrBot - Versions diffs - 4.3.5__py3-none-any.whl → 4.5.0__py3-none-any.whl - Mend

AstrBot 4.3.5py3-none-any.whl → 4.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (68) hide show

astrbot/core/agent/runners/tool_loop_agent_runner.py +31 -2
astrbot/core/astrbot_config_mgr.py +23 -51
astrbot/core/config/default.py +92 -12
astrbot/core/conversation_mgr.py +36 -1
astrbot/core/core_lifecycle.py +24 -5
astrbot/core/db/migration/migra_45_to_46.py +44 -0
astrbot/core/db/vec_db/base.py +33 -2
astrbot/core/db/vec_db/faiss_impl/document_storage.py +310 -52
astrbot/core/db/vec_db/faiss_impl/embedding_storage.py +31 -3
astrbot/core/db/vec_db/faiss_impl/vec_db.py +81 -23
astrbot/core/file_token_service.py +6 -1
astrbot/core/initial_loader.py +6 -3
astrbot/core/knowledge_base/chunking/__init__.py +11 -0
astrbot/core/knowledge_base/chunking/base.py +24 -0
astrbot/core/knowledge_base/chunking/fixed_size.py +57 -0
astrbot/core/knowledge_base/chunking/recursive.py +155 -0
astrbot/core/knowledge_base/kb_db_sqlite.py +299 -0
astrbot/core/knowledge_base/kb_helper.py +348 -0
astrbot/core/knowledge_base/kb_mgr.py +287 -0
astrbot/core/knowledge_base/models.py +114 -0
astrbot/core/knowledge_base/parsers/__init__.py +15 -0
astrbot/core/knowledge_base/parsers/base.py +50 -0
astrbot/core/knowledge_base/parsers/markitdown_parser.py +25 -0
astrbot/core/knowledge_base/parsers/pdf_parser.py +100 -0
astrbot/core/knowledge_base/parsers/text_parser.py +41 -0
astrbot/core/knowledge_base/parsers/util.py +13 -0
astrbot/core/knowledge_base/retrieval/__init__.py +16 -0
astrbot/core/knowledge_base/retrieval/hit_stopwords.txt +767 -0
astrbot/core/knowledge_base/retrieval/manager.py +273 -0
astrbot/core/knowledge_base/retrieval/rank_fusion.py +138 -0
astrbot/core/knowledge_base/retrieval/sparse_retriever.py +130 -0
astrbot/core/pipeline/process_stage/method/llm_request.py +29 -7
astrbot/core/pipeline/process_stage/utils.py +80 -0
astrbot/core/platform/astr_message_event.py +8 -7
astrbot/core/platform/sources/misskey/misskey_adapter.py +380 -44
astrbot/core/platform/sources/misskey/misskey_api.py +581 -45
astrbot/core/platform/sources/misskey/misskey_event.py +76 -41
astrbot/core/platform/sources/misskey/misskey_utils.py +254 -43
astrbot/core/platform/sources/qqofficial_webhook/qo_webhook_server.py +2 -1
astrbot/core/platform/sources/satori/satori_adapter.py +27 -1
astrbot/core/platform/sources/satori/satori_event.py +270 -99
astrbot/core/provider/manager.py +14 -9
astrbot/core/provider/provider.py +67 -0
astrbot/core/provider/sources/anthropic_source.py +4 -4
astrbot/core/provider/sources/dashscope_source.py +10 -9
astrbot/core/provider/sources/dify_source.py +6 -8
astrbot/core/provider/sources/gemini_embedding_source.py +1 -2
astrbot/core/provider/sources/openai_embedding_source.py +1 -2
astrbot/core/provider/sources/openai_source.py +18 -15
astrbot/core/provider/sources/openai_tts_api_source.py +1 -1
astrbot/core/star/context.py +3 -0
astrbot/core/star/star.py +6 -0
astrbot/core/star/star_manager.py +13 -7
astrbot/core/umop_config_router.py +81 -0
astrbot/core/updator.py +1 -1
astrbot/core/utils/io.py +23 -12
astrbot/dashboard/routes/__init__.py +2 -0
astrbot/dashboard/routes/config.py +137 -9
astrbot/dashboard/routes/knowledge_base.py +1065 -0
astrbot/dashboard/routes/plugin.py +24 -5
astrbot/dashboard/routes/update.py +1 -1
astrbot/dashboard/server.py +6 -0
astrbot/dashboard/utils.py +161 -0
{astrbot-4.3.5.dist-info → astrbot-4.5.0.dist-info}/METADATA +29 -13
{astrbot-4.3.5.dist-info → astrbot-4.5.0.dist-info}/RECORD +68 -44
{astrbot-4.3.5.dist-info → astrbot-4.5.0.dist-info}/WHEEL +0 -0
{astrbot-4.3.5.dist-info → astrbot-4.5.0.dist-info}/entry_points.txt +0 -0
{astrbot-4.3.5.dist-info → astrbot-4.5.0.dist-info}/licenses/LICENSE +0 -0

astrbot/core/db/vec_db/base.py CHANGED Viewed

@@ -16,14 +16,42 @@ class BaseVecDB:
         pass
     @abc.abstractmethod
-    async def insert(self, content: str, metadata: dict = None, id: str = None) -> int:
+    async def insert(
+        self, content: str, metadata: dict | None = None, id: str | None = None
+    ) -> int:
         """
         插入一条文本和其对应向量，自动生成 ID 并保持一致性。
         """
         ...
     @abc.abstractmethod
-    async def retrieve(self, query: str, top_k: int = 5) -> list[Result]:
+    async def insert_batch(
+        self,
+        contents: list[str],
+        metadatas: list[dict] | None = None,
+        ids: list[str] | None = None,
+        batch_size: int = 32,
+        tasks_limit: int = 3,
+        max_retries: int = 3,
+        progress_callback=None,
+    ) -> int:
+        """
+        批量插入文本和其对应向量，自动生成 ID 并保持一致性。
+        Args:
+            progress_callback: 进度回调函数，接收参数 (current, total)
+        """
+        ...
+    @abc.abstractmethod
+    async def retrieve(
+        self,
+        query: str,
+        top_k: int = 5,
+        fetch_k: int = 20,
+        rerank: bool = False,
+        metadata_filters: dict | None = None,
+    ) -> list[Result]:
         """
         搜索最相似的文档。
         Args:
@@ -44,3 +72,6 @@ class BaseVecDB:
             bool: 删除是否成功
         """
         ...
+    @abc.abstractmethod
+    async def close(self): ...

astrbot/core/db/vec_db/faiss_impl/document_storage.py CHANGED Viewed

@@ -1,59 +1,224 @@
-import aiosqlite
 import os
+import json
+from datetime import datetime
+from contextlib import asynccontextmanager
+from sqlalchemy import Text, Column
+from sqlalchemy.ext.asyncio import AsyncEngine, AsyncSession, create_async_engine
+from sqlalchemy.orm import sessionmaker
+from sqlmodel import Field, SQLModel, select, col, func, text, MetaData
+from astrbot.core import logger
+class BaseDocModel(SQLModel, table=False):
+    metadata = MetaData()
+class Document(BaseDocModel, table=True):
+    """SQLModel for documents table."""
+    __tablename__ = "documents"  # type: ignore
+    id: int | None = Field(
+        default=None, primary_key=True, sa_column_kwargs={"autoincrement": True}
+    )
+    doc_id: str = Field(nullable=False)
+    text: str = Field(nullable=False)
+    metadata_: str | None = Field(default=None, sa_column=Column("metadata", Text))
+    created_at: datetime | None = Field(default=None)
+    updated_at: datetime | None = Field(default=None)
 class DocumentStorage:
     def __init__(self, db_path: str):
         self.db_path = db_path
-        self.connection = None
+        self.DATABASE_URL = f"sqlite+aiosqlite:///{db_path}"
+        self.engine: AsyncEngine | None = None
+        self.async_session_maker: sessionmaker | None = None
         self.sqlite_init_path = os.path.join(
             os.path.dirname(__file__), "sqlite_init.sql"
         )
     async def initialize(self):
         """Initialize the SQLite database and create the documents table if it doesn't exist."""
-        if not os.path.exists(self.db_path):
-            await self.connect()
-            async with self.connection.cursor() as cursor:
-                with open(self.sqlite_init_path, "r", encoding="utf-8") as f:
-                    sql_script = f.read()
-                await cursor.executescript(sql_script)
-            await self.connection.commit()
-        else:
-            await self.connect()
+        await self.connect()
+        async with self.engine.begin() as conn:  # type: ignore
+            # Create tables using SQLModel
+            await conn.run_sync(BaseDocModel.metadata.create_all)
+            try:
+                await conn.execute(
+                    text(
+                        "ALTER TABLE documents ADD COLUMN kb_doc_id TEXT "
+                        "GENERATED ALWAYS AS (json_extract(metadata, '$.kb_doc_id')) STORED"
+                    )
+                )
+                await conn.execute(
+                    text(
+                        "ALTER TABLE documents ADD COLUMN user_id TEXT "
+                        "GENERATED ALWAYS AS (json_extract(metadata, '$.user_id')) STORED"
+                    )
+                )
+                # Create indexes
+                await conn.execute(
+                    text(
+                        "CREATE INDEX IF NOT EXISTS idx_documents_kb_doc_id ON documents(kb_doc_id)"
+                    )
+                )
+                await conn.execute(
+                    text(
+                        "CREATE INDEX IF NOT EXISTS idx_documents_user_id ON documents(user_id)"
+                    )
+                )
+            except BaseException:
+                pass
+            await conn.commit()
     async def connect(self):
         """Connect to the SQLite database."""
-        self.connection = await aiosqlite.connect(self.db_path)
+        if self.engine is None:
+            self.engine = create_async_engine(
+                self.DATABASE_URL,
+                echo=False,
+                future=True,
+            )
+            self.async_session_maker = sessionmaker(
+                self.engine,  # type: ignore
+                class_=AsyncSession,
+                expire_on_commit=False,
+            )  # type: ignore
+    @asynccontextmanager
+    async def get_session(self):
+        """Context manager for database sessions."""
+        async with self.async_session_maker() as session:  # type: ignore
+            yield session
-    async def get_documents(self, metadata_filters: dict, ids: list = None):
+    async def get_documents(
+        self,
+        metadata_filters: dict,
+        ids: list | None = None,
+        offset: int | None = 0,
+        limit: int | None = 100,
+    ) -> list[dict]:
         """Retrieve documents by metadata filters and ids.
         Args:
             metadata_filters (dict): The metadata filters to apply.
+            ids (list | None): Optional list of document IDs to filter.
+            offset (int | None): Offset for pagination.
+            limit (int | None): Limit for pagination.
+        Returns:
+            list: The list of documents that match the filters.
+        """
+        if self.engine is None:
+            logger.warning(
+                "Database connection is not initialized, returning empty result"
+            )
+            return []
+        async with self.get_session() as session:
+            query = select(Document)
+            for key, val in metadata_filters.items():
+                query = query.where(
+                    text(f"json_extract(metadata, '$.{key}') = :filter_{key}")
+                ).params(**{f"filter_{key}": val})
+            if ids is not None and len(ids) > 0:
+                valid_ids = [int(i) for i in ids if i != -1]
+                if valid_ids:
+                    query = query.where(col(Document.id).in_(valid_ids))
+            if limit is not None:
+                query = query.limit(limit)
+            if offset is not None:
+                query = query.offset(offset)
+            result = await session.execute(query)
+            documents = result.scalars().all()
+            return [self._document_to_dict(doc) for doc in documents]
+    async def insert_document(self, doc_id: str, text: str, metadata: dict) -> int:
+        """Insert a single document and return its integer ID.
+        Args:
+            doc_id (str): The document ID (UUID string).
+            text (str): The document text.
+            metadata (dict): The document metadata.
         Returns:
-            list: The list of document IDs(primary key, not doc_id) that match the filters.
+            int: The integer ID of the inserted document.
+        """
+        assert self.engine is not None, "Database connection is not initialized."
+        async with self.get_session() as session:
+            async with session.begin():
+                document = Document(
+                    doc_id=doc_id,
+                    text=text,
+                    metadata_=json.dumps(metadata),
+                    created_at=datetime.now(),
+                    updated_at=datetime.now(),
+                )
+                session.add(document)
+                await session.flush()  # Flush to get the ID
+                return document.id  # type: ignore
+    async def insert_documents_batch(
+        self, doc_ids: list[str], texts: list[str], metadatas: list[dict]
+    ) -> list[int]:
+        """Batch insert documents and return their integer IDs.
+        Args:
+            doc_ids (list[str]): List of document IDs (UUID strings).
+            texts (list[str]): List of document texts.
+            metadatas (list[dict]): List of document metadata.
+        Returns:
+            list[int]: List of integer IDs of the inserted documents.
+        """
+        assert self.engine is not None, "Database connection is not initialized."
+        async with self.get_session() as session:
+            async with session.begin():
+                import json
+                documents = []
+                for doc_id, text, metadata in zip(doc_ids, texts, metadatas):
+                    document = Document(
+                        doc_id=doc_id,
+                        text=text,
+                        metadata_=json.dumps(metadata),
+                        created_at=datetime.now(),
+                        updated_at=datetime.now(),
+                    )
+                    documents.append(document)
+                    session.add(document)
+                await session.flush()  # Flush to get all IDs
+                return [doc.id for doc in documents]  # type: ignore
+    async def delete_document_by_doc_id(self, doc_id: str):
+        """Delete a document by its doc_id.
+        Args:
+            doc_id (str): The doc_id of the document to delete.
         """
-        # metadata filter -> SQL WHERE clause
-        where_clauses = []
-        values = []
-        for key, val in metadata_filters.items():
-            where_clauses.append(f"json_extract(metadata, '$.{key}') = ?")
-            values.append(val)
-        if ids is not None and len(ids) > 0:
-            ids = [str(i) for i in ids if i != -1]
-            where_clauses.append("id IN ({})".format(",".join("?" * len(ids))))
-            values.extend(ids)
-        where_sql = " AND ".join(where_clauses) or "1=1"
-        result = []
-        async with self.connection.cursor() as cursor:
-            sql = "SELECT * FROM documents WHERE " + where_sql
-            await cursor.execute(sql, values)
-            for row in await cursor.fetchall():
-                result.append(await self.tuple_to_dict(row))
-        return result
+        assert self.engine is not None, "Database connection is not initialized."
+        async with self.get_session() as session:
+            async with session.begin():
+                query = select(Document).where(col(Document.doc_id) == doc_id)
+                result = await session.execute(query)
+                document = result.scalar_one_or_none()
+                if document:
+                    await session.delete(document)
     async def get_document_by_doc_id(self, doc_id: str):
         """Retrieve a document by its doc_id.
@@ -62,28 +227,91 @@ class DocumentStorage:
             doc_id (str): The doc_id of the document to retrieve.
         Returns:
-            dict: The document data.
+            dict: The document data or None if not found.
         """
-        async with self.connection.cursor() as cursor:
-            await cursor.execute("SELECT * FROM documents WHERE doc_id = ?", (doc_id,))
-            row = await cursor.fetchone()
-            if row:
-                return await self.tuple_to_dict(row)
-            else:
-                return None
+        assert self.engine is not None, "Database connection is not initialized."
+        async with self.get_session() as session:
+            query = select(Document).where(col(Document.doc_id) == doc_id)
+            result = await session.execute(query)
+            document = result.scalar_one_or_none()
+            if document:
+                return self._document_to_dict(document)
+            return None
     async def update_document_by_doc_id(self, doc_id: str, new_text: str):
-        """Retrieve a document by its doc_id.
+        """Update a document by its doc_id.
         Args:
             doc_id (str): The doc_id.
             new_text (str): The new text to update the document with.
         """
-        async with self.connection.cursor() as cursor:
-            await cursor.execute(
-                "UPDATE documents SET text = ? WHERE doc_id = ?", (new_text, doc_id)
+        assert self.engine is not None, "Database connection is not initialized."
+        async with self.get_session() as session:
+            async with session.begin():
+                query = select(Document).where(col(Document.doc_id) == doc_id)
+                result = await session.execute(query)
+                document = result.scalar_one_or_none()
+                if document:
+                    document.text = new_text
+                    document.updated_at = datetime.now()
+                    session.add(document)
+    async def delete_documents(self, metadata_filters: dict):
+        """Delete documents by their metadata filters.
+        Args:
+            metadata_filters (dict): The metadata filters to apply.
+        """
+        if self.engine is None:
+            logger.warning(
+                "Database connection is not initialized, skipping delete operation"
             )
-            await self.connection.commit()
+            return
+        async with self.get_session() as session:
+            async with session.begin():
+                query = select(Document)
+                for key, val in metadata_filters.items():
+                    query = query.where(
+                        text(f"json_extract(metadata, '$.{key}') = :filter_{key}")
+                    ).params(**{f"filter_{key}": val})
+                result = await session.execute(query)
+                documents = result.scalars().all()
+                for doc in documents:
+                    await session.delete(doc)
+    async def count_documents(self, metadata_filters: dict | None = None) -> int:
+        """Count documents in the database.
+        Args:
+            metadata_filters (dict | None): Metadata filters to apply.
+        Returns:
+            int: The count of documents.
+        """
+        if self.engine is None:
+            logger.warning("Database connection is not initialized, returning 0")
+            return 0
+        async with self.get_session() as session:
+            query = select(func.count(col(Document.id)))
+            if metadata_filters:
+                for key, val in metadata_filters.items():
+                    query = query.where(
+                        text(f"json_extract(metadata, '$.{key}') = :filter_{key}")
+                    ).params(**{f"filter_{key}": val})
+            result = await session.execute(query)
+            count = result.scalar_one_or_none()
+            return count if count is not None else 0
     async def get_user_ids(self) -> list[str]:
         """Retrieve all user IDs from the documents table.
@@ -91,11 +319,38 @@ class DocumentStorage:
         Returns:
             list: A list of user IDs.
         """
-        async with self.connection.cursor() as cursor:
-            await cursor.execute("SELECT DISTINCT user_id FROM documents")
-            rows = await cursor.fetchall()
+        assert self.engine is not None, "Database connection is not initialized."
+        async with self.get_session() as session:
+            query = text(
+                "SELECT DISTINCT user_id FROM documents WHERE user_id IS NOT NULL"
+            )
+            result = await session.execute(query)
+            rows = result.fetchall()
             return [row[0] for row in rows]
+    def _document_to_dict(self, document: Document) -> dict:
+        """Convert a Document model to a dictionary.
+        Args:
+            document (Document): The document to convert.
+        Returns:
+            dict: The converted dictionary.
+        """
+        return {
+            "id": document.id,
+            "doc_id": document.doc_id,
+            "text": document.text,
+            "metadata": document.metadata_,
+            "created_at": document.created_at.isoformat()
+            if isinstance(document.created_at, datetime)
+            else document.created_at,
+            "updated_at": document.updated_at.isoformat()
+            if isinstance(document.updated_at, datetime)
+            else document.updated_at,
+        }
     async def tuple_to_dict(self, row):
         """Convert a tuple to a dictionary.
@@ -104,6 +359,8 @@ class DocumentStorage:
         Returns:
             dict: The converted dictionary.
+        Note: This method is kept for backward compatibility but is no longer used internally.
         """
         return {
             "id": row[0],
@@ -116,6 +373,7 @@ class DocumentStorage:
     async def close(self):
         """Close the connection to the SQLite database."""
-        if self.connection:
-            await self.connection.close()
-            self.connection = None
+        if self.engine:
+            await self.engine.dispose()
+            self.engine = None
+            self.async_session_maker = None

astrbot/core/db/vec_db/faiss_impl/embedding_storage.py CHANGED Viewed

@@ -9,7 +9,7 @@ import numpy as np
 class EmbeddingStorage:
-    def __init__(self, dimension: int, path: str = None):
+    def __init__(self, dimension: int, path: str | None = None):
         self.dimension = dimension
         self.path = path
         self.index = None
@@ -18,7 +18,6 @@ class EmbeddingStorage:
         else:
             base_index = faiss.IndexFlatL2(dimension)
             self.index = faiss.IndexIDMap(base_index)
-        self.storage = {}
     async def insert(self, vector: np.ndarray, id: int):
         """插入向量
@@ -29,12 +28,29 @@ class EmbeddingStorage:
         Raises:
             ValueError: 如果向量的维度与存储的维度不匹配
         """
+        assert self.index is not None, "FAISS index is not initialized."
         if vector.shape[0] != self.dimension:
             raise ValueError(
                 f"向量维度不匹配, 期望: {self.dimension}, 实际: {vector.shape[0]}"
             )
         self.index.add_with_ids(vector.reshape(1, -1), np.array([id]))
-        self.storage[id] = vector
+        await self.save_index()
+    async def insert_batch(self, vectors: np.ndarray, ids: list[int]):
+        """批量插入向量
+        Args:
+            vectors (np.ndarray): 要插入的向量数组
+            ids (list[int]): 向量的ID列表
+        Raises:
+            ValueError: 如果向量的维度与存储的维度不匹配
+        """
+        assert self.index is not None, "FAISS index is not initialized."
+        if vectors.shape[1] != self.dimension:
+            raise ValueError(
+                f"向量维度不匹配, 期望: {self.dimension}, 实际: {vectors.shape[1]}"
+            )
+        self.index.add_with_ids(vectors, np.array(ids))
         await self.save_index()
     async def search(self, vector: np.ndarray, k: int) -> tuple:
@@ -46,10 +62,22 @@ class EmbeddingStorage:
         Returns:
             tuple: (距离, 索引)
         """
+        assert self.index is not None, "FAISS index is not initialized."
         faiss.normalize_L2(vector)
         distances, indices = self.index.search(vector, k)
         return distances, indices
+    async def delete(self, ids: list[int]):
+        """删除向量
+        Args:
+            ids (list[int]): 要删除的向量ID列表
+        """
+        assert self.index is not None, "FAISS index is not initialized."
+        id_array = np.array(ids, dtype=np.int64)
+        self.index.remove_ids(id_array)
+        await self.save_index()
     async def save_index(self):
         """保存索引

astrbot/core/db/vec_db/faiss_impl/vec_db.py CHANGED Viewed

@@ -1,11 +1,12 @@
 import uuid
-import json
+import time
 import numpy as np
 from .document_storage import DocumentStorage
 from .embedding_storage import EmbeddingStorage
 from ..base import Result, BaseVecDB
 from astrbot.core.provider.provider import EmbeddingProvider
 from astrbot.core.provider.provider import RerankProvider
+from astrbot import logger
 class FaissVecDB(BaseVecDB):
@@ -44,18 +45,56 @@ class FaissVecDB(BaseVecDB):
         vector = await self.embedding_provider.get_embedding(content)
         vector = np.array(vector, dtype=np.float32)
-        async with self.document_storage.connection.cursor() as cursor:
-            await cursor.execute(
-                "INSERT INTO documents (doc_id, text, metadata) VALUES (?, ?, ?)",
-                (str_id, content, json.dumps(metadata)),
-            )
-            await self.document_storage.connection.commit()
-            result = await self.document_storage.get_document_by_doc_id(str_id)
-            int_id = result["id"]
-            # 插入向量到 FAISS
-            await self.embedding_storage.insert(vector, int_id)
-            return int_id
+        # 使用 DocumentStorage 的方法插入文档
+        int_id = await self.document_storage.insert_document(str_id, content, metadata)
+        # 插入向量到 FAISS
+        await self.embedding_storage.insert(vector, int_id)
+        return int_id
+    async def insert_batch(
+        self,
+        contents: list[str],
+        metadatas: list[dict] | None = None,
+        ids: list[str] | None = None,
+        batch_size: int = 32,
+        tasks_limit: int = 3,
+        max_retries: int = 3,
+        progress_callback=None,
+    ) -> list[int]:
+        """
+        批量插入文本和其对应向量，自动生成 ID 并保持一致性。
+        Args:
+            progress_callback: 进度回调函数，接收参数 (current, total)
+        """
+        metadatas = metadatas or [{} for _ in contents]
+        ids = ids or [str(uuid.uuid4()) for _ in contents]
+        start = time.time()
+        logger.debug(f"Generating embeddings for {len(contents)} contents...")
+        vectors = await self.embedding_provider.get_embeddings_batch(
+            contents,
+            batch_size=batch_size,
+            tasks_limit=tasks_limit,
+            max_retries=max_retries,
+            progress_callback=progress_callback,
+        )
+        end = time.time()
+        logger.debug(
+            f"Generated embeddings for {len(contents)} contents in {end - start:.2f} seconds."
+        )
+        # 使用 DocumentStorage 的批量插入方法
+        int_ids = await self.document_storage.insert_documents_batch(
+            ids, contents, metadatas
+        )
+        # 批量插入向量到 FAISS
+        vectors_array = np.array(vectors).astype("float32")
+        await self.embedding_storage.insert_batch(vectors_array, int_ids)
+        return int_ids
     async def retrieve(
         self,
@@ -119,23 +158,42 @@ class FaissVecDB(BaseVecDB):
         return top_k_results
-    async def delete(self, doc_id: int):
+    async def delete(self, doc_id: str):
         """
-        删除一条文档
+        删除一条文档块（chunk）
         """
-        await self.document_storage.connection.execute(
-            "DELETE FROM documents WHERE doc_id = ?", (doc_id,)
-        )
-        await self.document_storage.connection.commit()
+        # 获得对应的 int id
+        result = await self.document_storage.get_document_by_doc_id(doc_id)
+        int_id = result["id"] if result else None
+        if int_id is None:
+            return
+        # 使用 DocumentStorage 的删除方法
+        await self.document_storage.delete_document_by_doc_id(doc_id)
+        await self.embedding_storage.delete([int_id])
     async def close(self):
         await self.document_storage.close()
-    async def count_documents(self) -> int:
+    async def count_documents(self, metadata_filter: dict | None = None) -> int:
         """
         计算文档数量
+        Args:
+            metadata_filter (dict | None): 元数据过滤器
         """
-        async with self.document_storage.connection.cursor() as cursor:
-            await cursor.execute("SELECT COUNT(*) FROM documents")
-            count = await cursor.fetchone()
-            return count[0] if count else 0
+        count = await self.document_storage.count_documents(
+            metadata_filters=metadata_filter or {}
+        )
+        return count
+    async def delete_documents(self, metadata_filters: dict):
+        """
+        根据元数据过滤器删除文档
+        """
+        docs = await self.document_storage.get_documents(
+            metadata_filters=metadata_filters, offset=None, limit=None
+        )
+        doc_ids: list[int] = [doc["id"] for doc in docs]
+        await self.embedding_storage.delete(doc_ids)
+        await self.document_storage.delete_documents(metadata_filters=metadata_filters)

AstrBot 4.3.5__py3-none-any.whl → 4.5.0__py3-none-any.whl

AstrBot 4.3.5py3-none-any.whl → 4.5.0py3-none-any.whl