AstrBot 3.5.6__py3-none-any.whl → 4.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- astrbot/api/__init__.py +16 -4
- astrbot/api/all.py +2 -1
- astrbot/api/event/__init__.py +5 -6
- astrbot/api/event/filter/__init__.py +37 -34
- astrbot/api/platform/__init__.py +7 -8
- astrbot/api/provider/__init__.py +8 -7
- astrbot/api/star/__init__.py +3 -4
- astrbot/api/util/__init__.py +2 -2
- astrbot/cli/__init__.py +1 -0
- astrbot/cli/__main__.py +18 -197
- astrbot/cli/commands/__init__.py +6 -0
- astrbot/cli/commands/cmd_conf.py +209 -0
- astrbot/cli/commands/cmd_init.py +56 -0
- astrbot/cli/commands/cmd_plug.py +245 -0
- astrbot/cli/commands/cmd_run.py +62 -0
- astrbot/cli/utils/__init__.py +18 -0
- astrbot/cli/utils/basic.py +76 -0
- astrbot/cli/utils/plugin.py +246 -0
- astrbot/cli/utils/version_comparator.py +90 -0
- astrbot/core/__init__.py +17 -19
- astrbot/core/agent/agent.py +14 -0
- astrbot/core/agent/handoff.py +38 -0
- astrbot/core/agent/hooks.py +30 -0
- astrbot/core/agent/mcp_client.py +385 -0
- astrbot/core/agent/message.py +175 -0
- astrbot/core/agent/response.py +14 -0
- astrbot/core/agent/run_context.py +22 -0
- astrbot/core/agent/runners/__init__.py +3 -0
- astrbot/core/agent/runners/base.py +65 -0
- astrbot/core/agent/runners/coze/coze_agent_runner.py +367 -0
- astrbot/core/agent/runners/coze/coze_api_client.py +324 -0
- astrbot/core/agent/runners/dashscope/dashscope_agent_runner.py +403 -0
- astrbot/core/agent/runners/dify/dify_agent_runner.py +336 -0
- astrbot/core/agent/runners/dify/dify_api_client.py +195 -0
- astrbot/core/agent/runners/tool_loop_agent_runner.py +400 -0
- astrbot/core/agent/tool.py +285 -0
- astrbot/core/agent/tool_executor.py +17 -0
- astrbot/core/astr_agent_context.py +19 -0
- astrbot/core/astr_agent_hooks.py +36 -0
- astrbot/core/astr_agent_run_util.py +80 -0
- astrbot/core/astr_agent_tool_exec.py +246 -0
- astrbot/core/astrbot_config_mgr.py +275 -0
- astrbot/core/config/__init__.py +2 -2
- astrbot/core/config/astrbot_config.py +60 -20
- astrbot/core/config/default.py +1972 -453
- astrbot/core/config/i18n_utils.py +110 -0
- astrbot/core/conversation_mgr.py +285 -75
- astrbot/core/core_lifecycle.py +167 -62
- astrbot/core/db/__init__.py +305 -102
- astrbot/core/db/migration/helper.py +69 -0
- astrbot/core/db/migration/migra_3_to_4.py +357 -0
- astrbot/core/db/migration/migra_45_to_46.py +44 -0
- astrbot/core/db/migration/migra_webchat_session.py +131 -0
- astrbot/core/db/migration/shared_preferences_v3.py +48 -0
- astrbot/core/db/migration/sqlite_v3.py +497 -0
- astrbot/core/db/po.py +259 -55
- astrbot/core/db/sqlite.py +773 -528
- astrbot/core/db/vec_db/base.py +73 -0
- astrbot/core/db/vec_db/faiss_impl/__init__.py +3 -0
- astrbot/core/db/vec_db/faiss_impl/document_storage.py +392 -0
- astrbot/core/db/vec_db/faiss_impl/embedding_storage.py +93 -0
- astrbot/core/db/vec_db/faiss_impl/sqlite_init.sql +17 -0
- astrbot/core/db/vec_db/faiss_impl/vec_db.py +204 -0
- astrbot/core/event_bus.py +26 -22
- astrbot/core/exceptions.py +9 -0
- astrbot/core/file_token_service.py +98 -0
- astrbot/core/initial_loader.py +19 -10
- astrbot/core/knowledge_base/chunking/__init__.py +9 -0
- astrbot/core/knowledge_base/chunking/base.py +25 -0
- astrbot/core/knowledge_base/chunking/fixed_size.py +59 -0
- astrbot/core/knowledge_base/chunking/recursive.py +161 -0
- astrbot/core/knowledge_base/kb_db_sqlite.py +301 -0
- astrbot/core/knowledge_base/kb_helper.py +642 -0
- astrbot/core/knowledge_base/kb_mgr.py +330 -0
- astrbot/core/knowledge_base/models.py +120 -0
- astrbot/core/knowledge_base/parsers/__init__.py +13 -0
- astrbot/core/knowledge_base/parsers/base.py +51 -0
- astrbot/core/knowledge_base/parsers/markitdown_parser.py +26 -0
- astrbot/core/knowledge_base/parsers/pdf_parser.py +101 -0
- astrbot/core/knowledge_base/parsers/text_parser.py +42 -0
- astrbot/core/knowledge_base/parsers/url_parser.py +103 -0
- astrbot/core/knowledge_base/parsers/util.py +13 -0
- astrbot/core/knowledge_base/prompts.py +65 -0
- astrbot/core/knowledge_base/retrieval/__init__.py +14 -0
- astrbot/core/knowledge_base/retrieval/hit_stopwords.txt +767 -0
- astrbot/core/knowledge_base/retrieval/manager.py +276 -0
- astrbot/core/knowledge_base/retrieval/rank_fusion.py +142 -0
- astrbot/core/knowledge_base/retrieval/sparse_retriever.py +136 -0
- astrbot/core/log.py +21 -15
- astrbot/core/message/components.py +413 -287
- astrbot/core/message/message_event_result.py +35 -24
- astrbot/core/persona_mgr.py +192 -0
- astrbot/core/pipeline/__init__.py +14 -14
- astrbot/core/pipeline/content_safety_check/stage.py +13 -9
- astrbot/core/pipeline/content_safety_check/strategies/__init__.py +1 -2
- astrbot/core/pipeline/content_safety_check/strategies/baidu_aip.py +13 -14
- astrbot/core/pipeline/content_safety_check/strategies/keywords.py +2 -1
- astrbot/core/pipeline/content_safety_check/strategies/strategy.py +6 -6
- astrbot/core/pipeline/context.py +7 -1
- astrbot/core/pipeline/context_utils.py +107 -0
- astrbot/core/pipeline/preprocess_stage/stage.py +63 -36
- astrbot/core/pipeline/process_stage/method/agent_request.py +48 -0
- astrbot/core/pipeline/process_stage/method/agent_sub_stages/internal.py +464 -0
- astrbot/core/pipeline/process_stage/method/agent_sub_stages/third_party.py +202 -0
- astrbot/core/pipeline/process_stage/method/star_request.py +26 -32
- astrbot/core/pipeline/process_stage/stage.py +21 -15
- astrbot/core/pipeline/process_stage/utils.py +125 -0
- astrbot/core/pipeline/rate_limit_check/stage.py +34 -36
- astrbot/core/pipeline/respond/stage.py +142 -101
- astrbot/core/pipeline/result_decorate/stage.py +124 -57
- astrbot/core/pipeline/scheduler.py +21 -16
- astrbot/core/pipeline/session_status_check/stage.py +37 -0
- astrbot/core/pipeline/stage.py +11 -76
- astrbot/core/pipeline/waking_check/stage.py +69 -33
- astrbot/core/pipeline/whitelist_check/stage.py +10 -7
- astrbot/core/platform/__init__.py +6 -6
- astrbot/core/platform/astr_message_event.py +107 -129
- astrbot/core/platform/astrbot_message.py +32 -12
- astrbot/core/platform/manager.py +62 -18
- astrbot/core/platform/message_session.py +30 -0
- astrbot/core/platform/platform.py +16 -24
- astrbot/core/platform/platform_metadata.py +9 -4
- astrbot/core/platform/register.py +12 -7
- astrbot/core/platform/sources/aiocqhttp/aiocqhttp_message_event.py +136 -60
- astrbot/core/platform/sources/aiocqhttp/aiocqhttp_platform_adapter.py +126 -46
- astrbot/core/platform/sources/dingtalk/dingtalk_adapter.py +63 -31
- astrbot/core/platform/sources/dingtalk/dingtalk_event.py +30 -26
- astrbot/core/platform/sources/discord/client.py +129 -0
- astrbot/core/platform/sources/discord/components.py +139 -0
- astrbot/core/platform/sources/discord/discord_platform_adapter.py +473 -0
- astrbot/core/platform/sources/discord/discord_platform_event.py +313 -0
- astrbot/core/platform/sources/lark/lark_adapter.py +27 -18
- astrbot/core/platform/sources/lark/lark_event.py +39 -13
- astrbot/core/platform/sources/misskey/misskey_adapter.py +770 -0
- astrbot/core/platform/sources/misskey/misskey_api.py +964 -0
- astrbot/core/platform/sources/misskey/misskey_event.py +163 -0
- astrbot/core/platform/sources/misskey/misskey_utils.py +550 -0
- astrbot/core/platform/sources/qqofficial/qqofficial_message_event.py +149 -33
- astrbot/core/platform/sources/qqofficial/qqofficial_platform_adapter.py +41 -26
- astrbot/core/platform/sources/qqofficial_webhook/qo_webhook_adapter.py +36 -17
- astrbot/core/platform/sources/qqofficial_webhook/qo_webhook_event.py +3 -1
- astrbot/core/platform/sources/qqofficial_webhook/qo_webhook_server.py +14 -8
- astrbot/core/platform/sources/satori/satori_adapter.py +792 -0
- astrbot/core/platform/sources/satori/satori_event.py +432 -0
- astrbot/core/platform/sources/slack/client.py +164 -0
- astrbot/core/platform/sources/slack/slack_adapter.py +416 -0
- astrbot/core/platform/sources/slack/slack_event.py +253 -0
- astrbot/core/platform/sources/telegram/tg_adapter.py +100 -43
- astrbot/core/platform/sources/telegram/tg_event.py +136 -36
- astrbot/core/platform/sources/webchat/webchat_adapter.py +72 -22
- astrbot/core/platform/sources/webchat/webchat_event.py +46 -22
- astrbot/core/platform/sources/webchat/webchat_queue_mgr.py +35 -0
- astrbot/core/platform/sources/wechatpadpro/wechatpadpro_adapter.py +926 -0
- astrbot/core/platform/sources/wechatpadpro/wechatpadpro_message_event.py +178 -0
- astrbot/core/platform/sources/wechatpadpro/xml_data_parser.py +159 -0
- astrbot/core/platform/sources/wecom/wecom_adapter.py +169 -27
- astrbot/core/platform/sources/wecom/wecom_event.py +162 -77
- astrbot/core/platform/sources/wecom/wecom_kf.py +279 -0
- astrbot/core/platform/sources/wecom/wecom_kf_message.py +196 -0
- astrbot/core/platform/sources/wecom_ai_bot/WXBizJsonMsgCrypt.py +297 -0
- astrbot/core/platform/sources/wecom_ai_bot/__init__.py +15 -0
- astrbot/core/platform/sources/wecom_ai_bot/ierror.py +19 -0
- astrbot/core/platform/sources/wecom_ai_bot/wecomai_adapter.py +472 -0
- astrbot/core/platform/sources/wecom_ai_bot/wecomai_api.py +417 -0
- astrbot/core/platform/sources/wecom_ai_bot/wecomai_event.py +152 -0
- astrbot/core/platform/sources/wecom_ai_bot/wecomai_queue_mgr.py +153 -0
- astrbot/core/platform/sources/wecom_ai_bot/wecomai_server.py +168 -0
- astrbot/core/platform/sources/wecom_ai_bot/wecomai_utils.py +209 -0
- astrbot/core/platform/sources/weixin_official_account/weixin_offacc_adapter.py +306 -0
- astrbot/core/platform/sources/weixin_official_account/weixin_offacc_event.py +186 -0
- astrbot/core/platform_message_history_mgr.py +49 -0
- astrbot/core/provider/__init__.py +2 -3
- astrbot/core/provider/entites.py +8 -8
- astrbot/core/provider/entities.py +154 -98
- astrbot/core/provider/func_tool_manager.py +446 -458
- astrbot/core/provider/manager.py +345 -207
- astrbot/core/provider/provider.py +188 -73
- astrbot/core/provider/register.py +9 -7
- astrbot/core/provider/sources/anthropic_source.py +295 -115
- astrbot/core/provider/sources/azure_tts_source.py +224 -0
- astrbot/core/provider/sources/bailian_rerank_source.py +236 -0
- astrbot/core/provider/sources/dashscope_tts.py +138 -14
- astrbot/core/provider/sources/edge_tts_source.py +24 -19
- astrbot/core/provider/sources/fishaudio_tts_api_source.py +58 -13
- astrbot/core/provider/sources/gemini_embedding_source.py +61 -0
- astrbot/core/provider/sources/gemini_source.py +310 -132
- astrbot/core/provider/sources/gemini_tts_source.py +81 -0
- astrbot/core/provider/sources/groq_source.py +15 -0
- astrbot/core/provider/sources/gsv_selfhosted_source.py +151 -0
- astrbot/core/provider/sources/gsvi_tts_source.py +14 -7
- astrbot/core/provider/sources/minimax_tts_api_source.py +159 -0
- astrbot/core/provider/sources/openai_embedding_source.py +40 -0
- astrbot/core/provider/sources/openai_source.py +241 -145
- astrbot/core/provider/sources/openai_tts_api_source.py +18 -7
- astrbot/core/provider/sources/sensevoice_selfhosted_source.py +13 -11
- astrbot/core/provider/sources/vllm_rerank_source.py +71 -0
- astrbot/core/provider/sources/volcengine_tts.py +115 -0
- astrbot/core/provider/sources/whisper_api_source.py +18 -13
- astrbot/core/provider/sources/whisper_selfhosted_source.py +19 -12
- astrbot/core/provider/sources/xinference_rerank_source.py +116 -0
- astrbot/core/provider/sources/xinference_stt_provider.py +197 -0
- astrbot/core/provider/sources/zhipu_source.py +6 -73
- astrbot/core/star/__init__.py +43 -11
- astrbot/core/star/config.py +17 -18
- astrbot/core/star/context.py +362 -138
- astrbot/core/star/filter/__init__.py +4 -3
- astrbot/core/star/filter/command.py +111 -35
- astrbot/core/star/filter/command_group.py +46 -34
- astrbot/core/star/filter/custom_filter.py +6 -5
- astrbot/core/star/filter/event_message_type.py +4 -2
- astrbot/core/star/filter/permission.py +4 -2
- astrbot/core/star/filter/platform_adapter_type.py +45 -12
- astrbot/core/star/filter/regex.py +4 -2
- astrbot/core/star/register/__init__.py +19 -15
- astrbot/core/star/register/star.py +41 -13
- astrbot/core/star/register/star_handler.py +236 -86
- astrbot/core/star/session_llm_manager.py +280 -0
- astrbot/core/star/session_plugin_manager.py +170 -0
- astrbot/core/star/star.py +36 -43
- astrbot/core/star/star_handler.py +47 -85
- astrbot/core/star/star_manager.py +442 -260
- astrbot/core/star/star_tools.py +167 -45
- astrbot/core/star/updator.py +17 -20
- astrbot/core/umop_config_router.py +106 -0
- astrbot/core/updator.py +38 -13
- astrbot/core/utils/astrbot_path.py +39 -0
- astrbot/core/utils/command_parser.py +1 -1
- astrbot/core/utils/io.py +119 -60
- astrbot/core/utils/log_pipe.py +1 -1
- astrbot/core/utils/metrics.py +11 -10
- astrbot/core/utils/migra_helper.py +73 -0
- astrbot/core/utils/path_util.py +63 -62
- astrbot/core/utils/pip_installer.py +37 -15
- astrbot/core/utils/session_lock.py +29 -0
- astrbot/core/utils/session_waiter.py +19 -20
- astrbot/core/utils/shared_preferences.py +174 -34
- astrbot/core/utils/t2i/__init__.py +4 -1
- astrbot/core/utils/t2i/local_strategy.py +386 -238
- astrbot/core/utils/t2i/network_strategy.py +109 -49
- astrbot/core/utils/t2i/renderer.py +29 -14
- astrbot/core/utils/t2i/template/astrbot_powershell.html +184 -0
- astrbot/core/utils/t2i/template_manager.py +111 -0
- astrbot/core/utils/tencent_record_helper.py +115 -1
- astrbot/core/utils/version_comparator.py +10 -13
- astrbot/core/zip_updator.py +112 -65
- astrbot/dashboard/routes/__init__.py +20 -13
- astrbot/dashboard/routes/auth.py +20 -9
- astrbot/dashboard/routes/chat.py +297 -141
- astrbot/dashboard/routes/config.py +652 -55
- astrbot/dashboard/routes/conversation.py +107 -37
- astrbot/dashboard/routes/file.py +26 -0
- astrbot/dashboard/routes/knowledge_base.py +1244 -0
- astrbot/dashboard/routes/log.py +27 -2
- astrbot/dashboard/routes/persona.py +202 -0
- astrbot/dashboard/routes/plugin.py +197 -139
- astrbot/dashboard/routes/route.py +27 -7
- astrbot/dashboard/routes/session_management.py +354 -0
- astrbot/dashboard/routes/stat.py +85 -18
- astrbot/dashboard/routes/static_file.py +5 -2
- astrbot/dashboard/routes/t2i.py +233 -0
- astrbot/dashboard/routes/tools.py +184 -120
- astrbot/dashboard/routes/update.py +59 -36
- astrbot/dashboard/server.py +96 -36
- astrbot/dashboard/utils.py +165 -0
- astrbot-4.7.0.dist-info/METADATA +294 -0
- astrbot-4.7.0.dist-info/RECORD +274 -0
- {astrbot-3.5.6.dist-info → astrbot-4.7.0.dist-info}/WHEEL +1 -1
- astrbot/core/db/plugin/sqlite_impl.py +0 -112
- astrbot/core/db/sqlite_init.sql +0 -50
- astrbot/core/pipeline/platform_compatibility/stage.py +0 -56
- astrbot/core/pipeline/process_stage/method/llm_request.py +0 -606
- astrbot/core/platform/sources/gewechat/client.py +0 -806
- astrbot/core/platform/sources/gewechat/downloader.py +0 -55
- astrbot/core/platform/sources/gewechat/gewechat_event.py +0 -255
- astrbot/core/platform/sources/gewechat/gewechat_platform_adapter.py +0 -103
- astrbot/core/platform/sources/gewechat/xml_data_parser.py +0 -110
- astrbot/core/provider/sources/dashscope_source.py +0 -203
- astrbot/core/provider/sources/dify_source.py +0 -281
- astrbot/core/provider/sources/llmtuner_source.py +0 -132
- astrbot/core/rag/embedding/openai_source.py +0 -20
- astrbot/core/rag/knowledge_db_mgr.py +0 -94
- astrbot/core/rag/store/__init__.py +0 -9
- astrbot/core/rag/store/chroma_db.py +0 -42
- astrbot/core/utils/dify_api_client.py +0 -152
- astrbot-3.5.6.dist-info/METADATA +0 -249
- astrbot-3.5.6.dist-info/RECORD +0 -158
- {astrbot-3.5.6.dist-info → astrbot-4.7.0.dist-info}/entry_points.txt +0 -0
- {astrbot-3.5.6.dist-info → astrbot-4.7.0.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
import time
|
|
2
|
+
import uuid
|
|
3
|
+
|
|
4
|
+
import numpy as np
|
|
5
|
+
|
|
6
|
+
from astrbot import logger
|
|
7
|
+
from astrbot.core.provider.provider import EmbeddingProvider, RerankProvider
|
|
8
|
+
|
|
9
|
+
from ..base import BaseVecDB, Result
|
|
10
|
+
from .document_storage import DocumentStorage
|
|
11
|
+
from .embedding_storage import EmbeddingStorage
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class FaissVecDB(BaseVecDB):
|
|
15
|
+
"""A class to represent a vector database."""
|
|
16
|
+
|
|
17
|
+
def __init__(
|
|
18
|
+
self,
|
|
19
|
+
doc_store_path: str,
|
|
20
|
+
index_store_path: str,
|
|
21
|
+
embedding_provider: EmbeddingProvider,
|
|
22
|
+
rerank_provider: RerankProvider | None = None,
|
|
23
|
+
):
|
|
24
|
+
self.doc_store_path = doc_store_path
|
|
25
|
+
self.index_store_path = index_store_path
|
|
26
|
+
self.embedding_provider = embedding_provider
|
|
27
|
+
self.document_storage = DocumentStorage(doc_store_path)
|
|
28
|
+
self.embedding_storage = EmbeddingStorage(
|
|
29
|
+
embedding_provider.get_dim(),
|
|
30
|
+
index_store_path,
|
|
31
|
+
)
|
|
32
|
+
self.embedding_provider = embedding_provider
|
|
33
|
+
self.rerank_provider = rerank_provider
|
|
34
|
+
|
|
35
|
+
async def initialize(self):
|
|
36
|
+
await self.document_storage.initialize()
|
|
37
|
+
|
|
38
|
+
async def insert(
|
|
39
|
+
self,
|
|
40
|
+
content: str,
|
|
41
|
+
metadata: dict | None = None,
|
|
42
|
+
id: str | None = None,
|
|
43
|
+
) -> int:
|
|
44
|
+
"""插入一条文本和其对应向量,自动生成 ID 并保持一致性。"""
|
|
45
|
+
metadata = metadata or {}
|
|
46
|
+
str_id = id or str(uuid.uuid4()) # 使用 UUID 作为原始 ID
|
|
47
|
+
|
|
48
|
+
vector = await self.embedding_provider.get_embedding(content)
|
|
49
|
+
vector = np.array(vector, dtype=np.float32)
|
|
50
|
+
|
|
51
|
+
# 使用 DocumentStorage 的方法插入文档
|
|
52
|
+
int_id = await self.document_storage.insert_document(str_id, content, metadata)
|
|
53
|
+
|
|
54
|
+
# 插入向量到 FAISS
|
|
55
|
+
await self.embedding_storage.insert(vector, int_id)
|
|
56
|
+
return int_id
|
|
57
|
+
|
|
58
|
+
async def insert_batch(
|
|
59
|
+
self,
|
|
60
|
+
contents: list[str],
|
|
61
|
+
metadatas: list[dict] | None = None,
|
|
62
|
+
ids: list[str] | None = None,
|
|
63
|
+
batch_size: int = 32,
|
|
64
|
+
tasks_limit: int = 3,
|
|
65
|
+
max_retries: int = 3,
|
|
66
|
+
progress_callback=None,
|
|
67
|
+
) -> list[int]:
|
|
68
|
+
"""批量插入文本和其对应向量,自动生成 ID 并保持一致性。
|
|
69
|
+
|
|
70
|
+
Args:
|
|
71
|
+
progress_callback: 进度回调函数,接收参数 (current, total)
|
|
72
|
+
|
|
73
|
+
"""
|
|
74
|
+
metadatas = metadatas or [{} for _ in contents]
|
|
75
|
+
ids = ids or [str(uuid.uuid4()) for _ in contents]
|
|
76
|
+
|
|
77
|
+
start = time.time()
|
|
78
|
+
logger.debug(f"Generating embeddings for {len(contents)} contents...")
|
|
79
|
+
vectors = await self.embedding_provider.get_embeddings_batch(
|
|
80
|
+
contents,
|
|
81
|
+
batch_size=batch_size,
|
|
82
|
+
tasks_limit=tasks_limit,
|
|
83
|
+
max_retries=max_retries,
|
|
84
|
+
progress_callback=progress_callback,
|
|
85
|
+
)
|
|
86
|
+
end = time.time()
|
|
87
|
+
logger.debug(
|
|
88
|
+
f"Generated embeddings for {len(contents)} contents in {end - start:.2f} seconds.",
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
# 使用 DocumentStorage 的批量插入方法
|
|
92
|
+
int_ids = await self.document_storage.insert_documents_batch(
|
|
93
|
+
ids,
|
|
94
|
+
contents,
|
|
95
|
+
metadatas,
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
# 批量插入向量到 FAISS
|
|
99
|
+
vectors_array = np.array(vectors).astype("float32")
|
|
100
|
+
await self.embedding_storage.insert_batch(vectors_array, int_ids)
|
|
101
|
+
return int_ids
|
|
102
|
+
|
|
103
|
+
async def retrieve(
|
|
104
|
+
self,
|
|
105
|
+
query: str,
|
|
106
|
+
k: int = 5,
|
|
107
|
+
fetch_k: int = 20,
|
|
108
|
+
rerank: bool = False,
|
|
109
|
+
metadata_filters: dict | None = None,
|
|
110
|
+
) -> list[Result]:
|
|
111
|
+
"""搜索最相似的文档。
|
|
112
|
+
|
|
113
|
+
Args:
|
|
114
|
+
query (str): 查询文本
|
|
115
|
+
k (int): 返回的最相似文档的数量
|
|
116
|
+
fetch_k (int): 在根据 metadata 过滤前从 FAISS 中获取的数量
|
|
117
|
+
rerank (bool): 是否使用重排序。这需要在实例化时提供 rerank_provider, 如果未提供并且 rerank 为 True, 不会抛出异常。
|
|
118
|
+
metadata_filters (dict): 元数据过滤器
|
|
119
|
+
|
|
120
|
+
Returns:
|
|
121
|
+
List[Result]: 查询结果
|
|
122
|
+
|
|
123
|
+
"""
|
|
124
|
+
embedding = await self.embedding_provider.get_embedding(query)
|
|
125
|
+
scores, indices = await self.embedding_storage.search(
|
|
126
|
+
vector=np.array([embedding]).astype("float32"),
|
|
127
|
+
k=fetch_k if metadata_filters else k,
|
|
128
|
+
)
|
|
129
|
+
if len(indices[0]) == 0 or indices[0][0] == -1:
|
|
130
|
+
return []
|
|
131
|
+
# normalize scores
|
|
132
|
+
scores[0] = 1.0 - (scores[0] / 2.0)
|
|
133
|
+
# NOTE: maybe the size is less than k.
|
|
134
|
+
fetched_docs = await self.document_storage.get_documents(
|
|
135
|
+
metadata_filters=metadata_filters or {},
|
|
136
|
+
ids=indices[0],
|
|
137
|
+
)
|
|
138
|
+
if not fetched_docs:
|
|
139
|
+
return []
|
|
140
|
+
result_docs: list[Result] = []
|
|
141
|
+
|
|
142
|
+
idx_pos = {fetch_doc["id"]: idx for idx, fetch_doc in enumerate(fetched_docs)}
|
|
143
|
+
for i, indice_idx in enumerate(indices[0]):
|
|
144
|
+
pos = idx_pos.get(indice_idx)
|
|
145
|
+
if pos is None:
|
|
146
|
+
continue
|
|
147
|
+
fetch_doc = fetched_docs[pos]
|
|
148
|
+
score = scores[0][i]
|
|
149
|
+
result_docs.append(Result(similarity=float(score), data=fetch_doc))
|
|
150
|
+
|
|
151
|
+
top_k_results = result_docs[:k]
|
|
152
|
+
|
|
153
|
+
if rerank and self.rerank_provider:
|
|
154
|
+
documents = [doc.data["text"] for doc in top_k_results]
|
|
155
|
+
reranked_results = await self.rerank_provider.rerank(query, documents)
|
|
156
|
+
reranked_results = sorted(
|
|
157
|
+
reranked_results,
|
|
158
|
+
key=lambda x: x.relevance_score,
|
|
159
|
+
reverse=True,
|
|
160
|
+
)
|
|
161
|
+
top_k_results = [
|
|
162
|
+
top_k_results[reranked_result.index]
|
|
163
|
+
for reranked_result in reranked_results
|
|
164
|
+
]
|
|
165
|
+
|
|
166
|
+
return top_k_results
|
|
167
|
+
|
|
168
|
+
async def delete(self, doc_id: str):
|
|
169
|
+
"""删除一条文档块(chunk)"""
|
|
170
|
+
# 获得对应的 int id
|
|
171
|
+
result = await self.document_storage.get_document_by_doc_id(doc_id)
|
|
172
|
+
int_id = result["id"] if result else None
|
|
173
|
+
if int_id is None:
|
|
174
|
+
return
|
|
175
|
+
|
|
176
|
+
# 使用 DocumentStorage 的删除方法
|
|
177
|
+
await self.document_storage.delete_document_by_doc_id(doc_id)
|
|
178
|
+
await self.embedding_storage.delete([int_id])
|
|
179
|
+
|
|
180
|
+
async def close(self):
|
|
181
|
+
await self.document_storage.close()
|
|
182
|
+
|
|
183
|
+
async def count_documents(self, metadata_filter: dict | None = None) -> int:
|
|
184
|
+
"""计算文档数量
|
|
185
|
+
|
|
186
|
+
Args:
|
|
187
|
+
metadata_filter (dict | None): 元数据过滤器
|
|
188
|
+
|
|
189
|
+
"""
|
|
190
|
+
count = await self.document_storage.count_documents(
|
|
191
|
+
metadata_filters=metadata_filter or {},
|
|
192
|
+
)
|
|
193
|
+
return count
|
|
194
|
+
|
|
195
|
+
async def delete_documents(self, metadata_filters: dict):
|
|
196
|
+
"""根据元数据过滤器删除文档"""
|
|
197
|
+
docs = await self.document_storage.get_documents(
|
|
198
|
+
metadata_filters=metadata_filters,
|
|
199
|
+
offset=None,
|
|
200
|
+
limit=None,
|
|
201
|
+
)
|
|
202
|
+
doc_ids: list[int] = [doc["id"] for doc in docs]
|
|
203
|
+
await self.embedding_storage.delete(doc_ids)
|
|
204
|
+
await self.document_storage.delete_documents(metadata_filters=metadata_filters)
|
astrbot/core/event_bus.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
"""
|
|
2
|
-
事件总线, 用于处理事件的分发和处理
|
|
1
|
+
"""事件总线, 用于处理事件的分发和处理
|
|
3
2
|
事件总线是一个异步队列, 用于接收各种消息事件, 并将其发送到Scheduler调度器进行处理
|
|
4
3
|
其中包含了一个无限循环的调度函数, 用于从事件队列中获取新的事件, 并创建一个新的异步任务来执行管道调度器的处理逻辑
|
|
5
4
|
|
|
@@ -13,45 +12,50 @@ class:
|
|
|
13
12
|
|
|
14
13
|
import asyncio
|
|
15
14
|
from asyncio import Queue
|
|
16
|
-
|
|
15
|
+
|
|
17
16
|
from astrbot.core import logger
|
|
17
|
+
from astrbot.core.astrbot_config_mgr import AstrBotConfigManager
|
|
18
|
+
from astrbot.core.pipeline.scheduler import PipelineScheduler
|
|
19
|
+
|
|
18
20
|
from .platform import AstrMessageEvent
|
|
19
21
|
|
|
20
22
|
|
|
21
23
|
class EventBus:
|
|
22
|
-
"""
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
24
|
+
"""用于处理事件的分发和处理"""
|
|
25
|
+
|
|
26
|
+
def __init__(
|
|
27
|
+
self,
|
|
28
|
+
event_queue: Queue,
|
|
29
|
+
pipeline_scheduler_mapping: dict[str, PipelineScheduler],
|
|
30
|
+
astrbot_config_mgr: AstrBotConfigManager = None,
|
|
31
|
+
):
|
|
28
32
|
self.event_queue = event_queue # 事件队列
|
|
29
|
-
|
|
33
|
+
# abconf uuid -> scheduler
|
|
34
|
+
self.pipeline_scheduler_mapping = pipeline_scheduler_mapping
|
|
35
|
+
self.astrbot_config_mgr = astrbot_config_mgr
|
|
30
36
|
|
|
31
37
|
async def dispatch(self):
|
|
32
|
-
"""无限循环的调度函数, 从事件队列中获取新的事件, 打印日志并创建一个新的异步任务来执行管道调度器的处理逻辑"""
|
|
33
38
|
while True:
|
|
34
|
-
event: AstrMessageEvent = (
|
|
35
|
-
|
|
36
|
-
)
|
|
37
|
-
self.
|
|
38
|
-
asyncio.create_task(
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
def _print_event(self, event: AstrMessageEvent):
|
|
39
|
+
event: AstrMessageEvent = await self.event_queue.get()
|
|
40
|
+
conf_info = self.astrbot_config_mgr.get_conf_info(event.unified_msg_origin)
|
|
41
|
+
self._print_event(event, conf_info["name"])
|
|
42
|
+
scheduler = self.pipeline_scheduler_mapping.get(conf_info["id"])
|
|
43
|
+
asyncio.create_task(scheduler.execute(event))
|
|
44
|
+
|
|
45
|
+
def _print_event(self, event: AstrMessageEvent, conf_name: str):
|
|
43
46
|
"""用于记录事件信息
|
|
44
47
|
|
|
45
48
|
Args:
|
|
46
49
|
event (AstrMessageEvent): 事件对象
|
|
50
|
+
|
|
47
51
|
"""
|
|
48
52
|
# 如果有发送者名称: [平台名] 发送者名称/发送者ID: 消息概要
|
|
49
53
|
if event.get_sender_name():
|
|
50
54
|
logger.info(
|
|
51
|
-
f"[{event.get_platform_name()}] {event.get_sender_name()}/{event.get_sender_id()}: {event.get_message_outline()}"
|
|
55
|
+
f"[{conf_name}] [{event.get_platform_id()}({event.get_platform_name()})] {event.get_sender_name()}/{event.get_sender_id()}: {event.get_message_outline()}",
|
|
52
56
|
)
|
|
53
57
|
# 没有发送者名称: [平台名] 发送者ID: 消息概要
|
|
54
58
|
else:
|
|
55
59
|
logger.info(
|
|
56
|
-
f"[{event.get_platform_name()}] {event.get_sender_id()}: {event.get_message_outline()}"
|
|
60
|
+
f"[{conf_name}] [{event.get_platform_id()}({event.get_platform_name()})] {event.get_sender_id()}: {event.get_message_outline()}",
|
|
57
61
|
)
|
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
import os
|
|
3
|
+
import platform
|
|
4
|
+
import time
|
|
5
|
+
import uuid
|
|
6
|
+
from urllib.parse import unquote, urlparse
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class FileTokenService:
|
|
10
|
+
"""维护一个简单的基于令牌的文件下载服务,支持超时和懒清除。"""
|
|
11
|
+
|
|
12
|
+
def __init__(self, default_timeout: float = 300):
|
|
13
|
+
self.lock = asyncio.Lock()
|
|
14
|
+
self.staged_files = {} # token: (file_path, expire_time)
|
|
15
|
+
self.default_timeout = default_timeout
|
|
16
|
+
|
|
17
|
+
async def _cleanup_expired_tokens(self):
|
|
18
|
+
"""清理过期的令牌"""
|
|
19
|
+
now = time.time()
|
|
20
|
+
expired_tokens = [
|
|
21
|
+
token for token, (_, expire) in self.staged_files.items() if expire < now
|
|
22
|
+
]
|
|
23
|
+
for token in expired_tokens:
|
|
24
|
+
self.staged_files.pop(token, None)
|
|
25
|
+
|
|
26
|
+
async def check_token_expired(self, file_token: str) -> bool:
|
|
27
|
+
async with self.lock:
|
|
28
|
+
await self._cleanup_expired_tokens()
|
|
29
|
+
return file_token not in self.staged_files
|
|
30
|
+
|
|
31
|
+
async def register_file(self, file_path: str, timeout: float | None = None) -> str:
|
|
32
|
+
"""向令牌服务注册一个文件。
|
|
33
|
+
|
|
34
|
+
Args:
|
|
35
|
+
file_path(str): 文件路径
|
|
36
|
+
timeout(float): 超时时间,单位秒(可选)
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
str: 一个单次令牌
|
|
40
|
+
|
|
41
|
+
Raises:
|
|
42
|
+
FileNotFoundError: 当路径不存在时抛出
|
|
43
|
+
|
|
44
|
+
"""
|
|
45
|
+
# 处理 file:///
|
|
46
|
+
try:
|
|
47
|
+
parsed_uri = urlparse(file_path)
|
|
48
|
+
if parsed_uri.scheme == "file":
|
|
49
|
+
local_path = unquote(parsed_uri.path)
|
|
50
|
+
if platform.system() == "Windows" and local_path.startswith("/"):
|
|
51
|
+
local_path = local_path[1:]
|
|
52
|
+
else:
|
|
53
|
+
# 如果没有 file:/// 前缀,则认为是普通路径
|
|
54
|
+
local_path = file_path
|
|
55
|
+
except Exception:
|
|
56
|
+
# 解析失败时,按原路径处理
|
|
57
|
+
local_path = file_path
|
|
58
|
+
|
|
59
|
+
async with self.lock:
|
|
60
|
+
await self._cleanup_expired_tokens()
|
|
61
|
+
|
|
62
|
+
if not os.path.exists(local_path):
|
|
63
|
+
raise FileNotFoundError(
|
|
64
|
+
f"文件不存在: {local_path} (原始输入: {file_path})",
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
file_token = str(uuid.uuid4())
|
|
68
|
+
expire_time = time.time() + (
|
|
69
|
+
timeout if timeout is not None else self.default_timeout
|
|
70
|
+
)
|
|
71
|
+
# 存储转换后的真实路径
|
|
72
|
+
self.staged_files[file_token] = (local_path, expire_time)
|
|
73
|
+
return file_token
|
|
74
|
+
|
|
75
|
+
async def handle_file(self, file_token: str) -> str:
|
|
76
|
+
"""根据令牌获取文件路径,使用后令牌失效。
|
|
77
|
+
|
|
78
|
+
Args:
|
|
79
|
+
file_token(str): 注册时返回的令牌
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
str: 文件路径
|
|
83
|
+
|
|
84
|
+
Raises:
|
|
85
|
+
KeyError: 当令牌不存在或已过期时抛出
|
|
86
|
+
FileNotFoundError: 当文件本身已被删除时抛出
|
|
87
|
+
|
|
88
|
+
"""
|
|
89
|
+
async with self.lock:
|
|
90
|
+
await self._cleanup_expired_tokens()
|
|
91
|
+
|
|
92
|
+
if file_token not in self.staged_files:
|
|
93
|
+
raise KeyError(f"无效或过期的文件 token: {file_token}")
|
|
94
|
+
|
|
95
|
+
file_path, _ = self.staged_files.pop(file_token)
|
|
96
|
+
if not os.path.exists(file_path):
|
|
97
|
+
raise FileNotFoundError(f"文件不存在: {file_path}")
|
|
98
|
+
return file_path
|
astrbot/core/initial_loader.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
"""
|
|
2
|
-
AstrBot 启动器,负责初始化和启动核心组件和仪表板服务器。
|
|
1
|
+
"""AstrBot 启动器,负责初始化和启动核心组件和仪表板服务器。
|
|
3
2
|
|
|
4
3
|
工作流程:
|
|
5
4
|
1. 初始化核心生命周期, 传递数据库和日志代理实例到核心生命周期
|
|
@@ -8,10 +7,10 @@ AstrBot 启动器,负责初始化和启动核心组件和仪表板服务器。
|
|
|
8
7
|
|
|
9
8
|
import asyncio
|
|
10
9
|
import traceback
|
|
11
|
-
|
|
10
|
+
|
|
11
|
+
from astrbot.core import LogBroker, logger
|
|
12
12
|
from astrbot.core.core_lifecycle import AstrBotCoreLifecycle
|
|
13
13
|
from astrbot.core.db import BaseDatabase
|
|
14
|
-
from astrbot.core import LogBroker
|
|
15
14
|
from astrbot.dashboard.server import AstrBotDashboard
|
|
16
15
|
|
|
17
16
|
|
|
@@ -22,25 +21,35 @@ class InitialLoader:
|
|
|
22
21
|
self.db = db
|
|
23
22
|
self.logger = logger
|
|
24
23
|
self.log_broker = log_broker
|
|
24
|
+
self.webui_dir: str | None = None
|
|
25
25
|
|
|
26
26
|
async def start(self):
|
|
27
27
|
core_lifecycle = AstrBotCoreLifecycle(self.log_broker, self.db)
|
|
28
28
|
|
|
29
|
-
core_task = []
|
|
30
29
|
try:
|
|
31
30
|
await core_lifecycle.initialize()
|
|
32
|
-
core_task = core_lifecycle.start()
|
|
33
31
|
except Exception as e:
|
|
34
32
|
logger.critical(traceback.format_exc())
|
|
35
33
|
logger.critical(f"😭 初始化 AstrBot 失败:{e} !!!")
|
|
34
|
+
return
|
|
35
|
+
|
|
36
|
+
core_task = core_lifecycle.start()
|
|
37
|
+
|
|
38
|
+
webui_dir = self.webui_dir
|
|
36
39
|
|
|
37
40
|
self.dashboard_server = AstrBotDashboard(
|
|
38
|
-
core_lifecycle,
|
|
41
|
+
core_lifecycle,
|
|
42
|
+
self.db,
|
|
43
|
+
core_lifecycle.dashboard_shutdown_event,
|
|
44
|
+
webui_dir,
|
|
39
45
|
)
|
|
40
|
-
task = asyncio.gather(
|
|
41
|
-
core_task, self.dashboard_server.run()
|
|
42
|
-
) # 启动核心任务和仪表板服务器
|
|
43
46
|
|
|
47
|
+
coro = self.dashboard_server.run()
|
|
48
|
+
if coro:
|
|
49
|
+
# 启动核心任务和仪表板服务器
|
|
50
|
+
task = asyncio.gather(core_task, coro)
|
|
51
|
+
else:
|
|
52
|
+
task = core_task
|
|
44
53
|
try:
|
|
45
54
|
await task # 整个AstrBot在这里运行
|
|
46
55
|
except asyncio.CancelledError:
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
"""文档分块器基类
|
|
2
|
+
|
|
3
|
+
定义了文档分块处理的抽象接口。
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from abc import ABC, abstractmethod
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class BaseChunker(ABC):
|
|
10
|
+
"""分块器基类
|
|
11
|
+
|
|
12
|
+
所有分块器都应该继承此类并实现 chunk 方法。
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
@abstractmethod
|
|
16
|
+
async def chunk(self, text: str, **kwargs) -> list[str]:
|
|
17
|
+
"""将文本分块
|
|
18
|
+
|
|
19
|
+
Args:
|
|
20
|
+
text: 输入文本
|
|
21
|
+
|
|
22
|
+
Returns:
|
|
23
|
+
list[str]: 分块后的文本列表
|
|
24
|
+
|
|
25
|
+
"""
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
"""固定大小分块器
|
|
2
|
+
|
|
3
|
+
按照固定的字符数将文本分块,支持重叠区域。
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
from .base import BaseChunker
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class FixedSizeChunker(BaseChunker):
|
|
10
|
+
"""固定大小分块器
|
|
11
|
+
|
|
12
|
+
按照固定的字符数分块,并支持块之间的重叠。
|
|
13
|
+
"""
|
|
14
|
+
|
|
15
|
+
def __init__(self, chunk_size: int = 512, chunk_overlap: int = 50):
|
|
16
|
+
"""初始化分块器
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
chunk_size: 块的大小(字符数)
|
|
20
|
+
chunk_overlap: 块之间的重叠字符数
|
|
21
|
+
|
|
22
|
+
"""
|
|
23
|
+
self.chunk_size = chunk_size
|
|
24
|
+
self.chunk_overlap = chunk_overlap
|
|
25
|
+
|
|
26
|
+
async def chunk(self, text: str, **kwargs) -> list[str]:
|
|
27
|
+
"""固定大小分块
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
text: 输入文本
|
|
31
|
+
chunk_size: 每个文本块的最大大小
|
|
32
|
+
chunk_overlap: 每个文本块之间的重叠部分大小
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
list[str]: 分块后的文本列表
|
|
36
|
+
|
|
37
|
+
"""
|
|
38
|
+
chunk_size = kwargs.get("chunk_size", self.chunk_size)
|
|
39
|
+
chunk_overlap = kwargs.get("chunk_overlap", self.chunk_overlap)
|
|
40
|
+
|
|
41
|
+
chunks = []
|
|
42
|
+
start = 0
|
|
43
|
+
text_len = len(text)
|
|
44
|
+
|
|
45
|
+
while start < text_len:
|
|
46
|
+
end = start + chunk_size
|
|
47
|
+
chunk = text[start:end]
|
|
48
|
+
|
|
49
|
+
if chunk:
|
|
50
|
+
chunks.append(chunk)
|
|
51
|
+
|
|
52
|
+
# 移动窗口,保留重叠部分
|
|
53
|
+
start = end - chunk_overlap
|
|
54
|
+
|
|
55
|
+
# 防止无限循环: 如果重叠过大,直接移到end
|
|
56
|
+
if start >= end or chunk_overlap >= chunk_size:
|
|
57
|
+
start = end
|
|
58
|
+
|
|
59
|
+
return chunks
|