AstrBot 4.5.0__py3-none-any.whl → 4.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (244) hide show
  1. astrbot/api/__init__.py +10 -11
  2. astrbot/api/event/__init__.py +5 -6
  3. astrbot/api/event/filter/__init__.py +37 -36
  4. astrbot/api/platform/__init__.py +7 -8
  5. astrbot/api/provider/__init__.py +7 -7
  6. astrbot/api/star/__init__.py +3 -4
  7. astrbot/api/util/__init__.py +2 -2
  8. astrbot/cli/__main__.py +5 -5
  9. astrbot/cli/commands/__init__.py +3 -3
  10. astrbot/cli/commands/cmd_conf.py +19 -16
  11. astrbot/cli/commands/cmd_init.py +3 -2
  12. astrbot/cli/commands/cmd_plug.py +8 -10
  13. astrbot/cli/commands/cmd_run.py +5 -6
  14. astrbot/cli/utils/__init__.py +6 -6
  15. astrbot/cli/utils/basic.py +14 -14
  16. astrbot/cli/utils/plugin.py +24 -15
  17. astrbot/cli/utils/version_comparator.py +10 -12
  18. astrbot/core/__init__.py +8 -6
  19. astrbot/core/agent/agent.py +3 -2
  20. astrbot/core/agent/handoff.py +6 -2
  21. astrbot/core/agent/hooks.py +9 -6
  22. astrbot/core/agent/mcp_client.py +50 -15
  23. astrbot/core/agent/message.py +168 -0
  24. astrbot/core/agent/response.py +2 -1
  25. astrbot/core/agent/run_context.py +2 -3
  26. astrbot/core/agent/runners/base.py +10 -13
  27. astrbot/core/agent/runners/tool_loop_agent_runner.py +52 -51
  28. astrbot/core/agent/tool.py +60 -41
  29. astrbot/core/agent/tool_executor.py +9 -3
  30. astrbot/core/astr_agent_context.py +3 -1
  31. astrbot/core/astrbot_config_mgr.py +29 -9
  32. astrbot/core/config/__init__.py +2 -2
  33. astrbot/core/config/astrbot_config.py +28 -26
  34. astrbot/core/config/default.py +44 -6
  35. astrbot/core/conversation_mgr.py +105 -36
  36. astrbot/core/core_lifecycle.py +68 -54
  37. astrbot/core/db/__init__.py +33 -18
  38. astrbot/core/db/migration/helper.py +18 -13
  39. astrbot/core/db/migration/migra_3_to_4.py +53 -34
  40. astrbot/core/db/migration/migra_45_to_46.py +1 -1
  41. astrbot/core/db/migration/shared_preferences_v3.py +2 -1
  42. astrbot/core/db/migration/sqlite_v3.py +26 -23
  43. astrbot/core/db/po.py +27 -18
  44. astrbot/core/db/sqlite.py +74 -45
  45. astrbot/core/db/vec_db/base.py +10 -14
  46. astrbot/core/db/vec_db/faiss_impl/document_storage.py +90 -77
  47. astrbot/core/db/vec_db/faiss_impl/embedding_storage.py +9 -3
  48. astrbot/core/db/vec_db/faiss_impl/vec_db.py +36 -31
  49. astrbot/core/event_bus.py +8 -6
  50. astrbot/core/file_token_service.py +6 -5
  51. astrbot/core/initial_loader.py +7 -5
  52. astrbot/core/knowledge_base/chunking/__init__.py +1 -3
  53. astrbot/core/knowledge_base/chunking/base.py +1 -0
  54. astrbot/core/knowledge_base/chunking/fixed_size.py +2 -0
  55. astrbot/core/knowledge_base/chunking/recursive.py +16 -10
  56. astrbot/core/knowledge_base/kb_db_sqlite.py +50 -48
  57. astrbot/core/knowledge_base/kb_helper.py +30 -17
  58. astrbot/core/knowledge_base/kb_mgr.py +6 -7
  59. astrbot/core/knowledge_base/models.py +10 -4
  60. astrbot/core/knowledge_base/parsers/__init__.py +3 -5
  61. astrbot/core/knowledge_base/parsers/base.py +1 -0
  62. astrbot/core/knowledge_base/parsers/markitdown_parser.py +2 -1
  63. astrbot/core/knowledge_base/parsers/pdf_parser.py +2 -1
  64. astrbot/core/knowledge_base/parsers/text_parser.py +1 -0
  65. astrbot/core/knowledge_base/parsers/util.py +1 -1
  66. astrbot/core/knowledge_base/retrieval/__init__.py +6 -8
  67. astrbot/core/knowledge_base/retrieval/manager.py +17 -14
  68. astrbot/core/knowledge_base/retrieval/rank_fusion.py +7 -3
  69. astrbot/core/knowledge_base/retrieval/sparse_retriever.py +11 -5
  70. astrbot/core/log.py +21 -13
  71. astrbot/core/message/components.py +123 -217
  72. astrbot/core/message/message_event_result.py +24 -24
  73. astrbot/core/persona_mgr.py +20 -11
  74. astrbot/core/pipeline/__init__.py +7 -7
  75. astrbot/core/pipeline/content_safety_check/stage.py +13 -9
  76. astrbot/core/pipeline/content_safety_check/strategies/__init__.py +1 -2
  77. astrbot/core/pipeline/content_safety_check/strategies/baidu_aip.py +12 -13
  78. astrbot/core/pipeline/content_safety_check/strategies/keywords.py +1 -0
  79. astrbot/core/pipeline/content_safety_check/strategies/strategy.py +6 -6
  80. astrbot/core/pipeline/context.py +4 -1
  81. astrbot/core/pipeline/context_utils.py +77 -7
  82. astrbot/core/pipeline/preprocess_stage/stage.py +12 -9
  83. astrbot/core/pipeline/process_stage/method/llm_request.py +125 -72
  84. astrbot/core/pipeline/process_stage/method/star_request.py +19 -17
  85. astrbot/core/pipeline/process_stage/stage.py +13 -10
  86. astrbot/core/pipeline/process_stage/utils.py +6 -5
  87. astrbot/core/pipeline/rate_limit_check/stage.py +37 -36
  88. astrbot/core/pipeline/respond/stage.py +23 -20
  89. astrbot/core/pipeline/result_decorate/stage.py +31 -23
  90. astrbot/core/pipeline/scheduler.py +12 -8
  91. astrbot/core/pipeline/session_status_check/stage.py +12 -8
  92. astrbot/core/pipeline/stage.py +10 -4
  93. astrbot/core/pipeline/waking_check/stage.py +24 -18
  94. astrbot/core/pipeline/whitelist_check/stage.py +10 -7
  95. astrbot/core/platform/__init__.py +6 -6
  96. astrbot/core/platform/astr_message_event.py +76 -110
  97. astrbot/core/platform/astrbot_message.py +11 -13
  98. astrbot/core/platform/manager.py +16 -15
  99. astrbot/core/platform/message_session.py +5 -3
  100. astrbot/core/platform/platform.py +16 -24
  101. astrbot/core/platform/platform_metadata.py +4 -4
  102. astrbot/core/platform/register.py +8 -8
  103. astrbot/core/platform/sources/aiocqhttp/aiocqhttp_message_event.py +23 -15
  104. astrbot/core/platform/sources/aiocqhttp/aiocqhttp_platform_adapter.py +51 -33
  105. astrbot/core/platform/sources/dingtalk/dingtalk_adapter.py +47 -29
  106. astrbot/core/platform/sources/dingtalk/dingtalk_event.py +7 -3
  107. astrbot/core/platform/sources/discord/client.py +9 -6
  108. astrbot/core/platform/sources/discord/components.py +18 -14
  109. astrbot/core/platform/sources/discord/discord_platform_adapter.py +45 -30
  110. astrbot/core/platform/sources/discord/discord_platform_event.py +38 -30
  111. astrbot/core/platform/sources/lark/lark_adapter.py +23 -17
  112. astrbot/core/platform/sources/lark/lark_event.py +21 -14
  113. astrbot/core/platform/sources/misskey/misskey_adapter.py +107 -67
  114. astrbot/core/platform/sources/misskey/misskey_api.py +153 -129
  115. astrbot/core/platform/sources/misskey/misskey_event.py +20 -15
  116. astrbot/core/platform/sources/misskey/misskey_utils.py +74 -62
  117. astrbot/core/platform/sources/qqofficial/qqofficial_message_event.py +63 -44
  118. astrbot/core/platform/sources/qqofficial/qqofficial_platform_adapter.py +41 -26
  119. astrbot/core/platform/sources/qqofficial_webhook/qo_webhook_adapter.py +36 -17
  120. astrbot/core/platform/sources/qqofficial_webhook/qo_webhook_event.py +3 -1
  121. astrbot/core/platform/sources/qqofficial_webhook/qo_webhook_server.py +12 -7
  122. astrbot/core/platform/sources/satori/satori_adapter.py +56 -38
  123. astrbot/core/platform/sources/satori/satori_event.py +34 -25
  124. astrbot/core/platform/sources/slack/client.py +11 -9
  125. astrbot/core/platform/sources/slack/slack_adapter.py +52 -36
  126. astrbot/core/platform/sources/slack/slack_event.py +34 -24
  127. astrbot/core/platform/sources/telegram/tg_adapter.py +38 -18
  128. astrbot/core/platform/sources/telegram/tg_event.py +32 -18
  129. astrbot/core/platform/sources/webchat/webchat_adapter.py +27 -17
  130. astrbot/core/platform/sources/webchat/webchat_event.py +14 -10
  131. astrbot/core/platform/sources/wechatpadpro/wechatpadpro_adapter.py +115 -120
  132. astrbot/core/platform/sources/wechatpadpro/wechatpadpro_message_event.py +9 -8
  133. astrbot/core/platform/sources/wechatpadpro/xml_data_parser.py +15 -16
  134. astrbot/core/platform/sources/wecom/wecom_adapter.py +35 -18
  135. astrbot/core/platform/sources/wecom/wecom_event.py +55 -48
  136. astrbot/core/platform/sources/wecom/wecom_kf.py +34 -44
  137. astrbot/core/platform/sources/wecom/wecom_kf_message.py +26 -10
  138. astrbot/core/platform/sources/wecom_ai_bot/WXBizJsonMsgCrypt.py +18 -10
  139. astrbot/core/platform/sources/wecom_ai_bot/__init__.py +3 -5
  140. astrbot/core/platform/sources/wecom_ai_bot/ierror.py +0 -1
  141. astrbot/core/platform/sources/wecom_ai_bot/wecomai_adapter.py +61 -37
  142. astrbot/core/platform/sources/wecom_ai_bot/wecomai_api.py +67 -28
  143. astrbot/core/platform/sources/wecom_ai_bot/wecomai_event.py +8 -9
  144. astrbot/core/platform/sources/wecom_ai_bot/wecomai_queue_mgr.py +18 -9
  145. astrbot/core/platform/sources/wecom_ai_bot/wecomai_server.py +14 -12
  146. astrbot/core/platform/sources/wecom_ai_bot/wecomai_utils.py +22 -12
  147. astrbot/core/platform/sources/weixin_official_account/weixin_offacc_adapter.py +40 -26
  148. astrbot/core/platform/sources/weixin_official_account/weixin_offacc_event.py +47 -45
  149. astrbot/core/platform_message_history_mgr.py +5 -3
  150. astrbot/core/provider/__init__.py +2 -3
  151. astrbot/core/provider/entites.py +8 -8
  152. astrbot/core/provider/entities.py +61 -75
  153. astrbot/core/provider/func_tool_manager.py +59 -55
  154. astrbot/core/provider/manager.py +40 -22
  155. astrbot/core/provider/provider.py +72 -46
  156. astrbot/core/provider/register.py +7 -7
  157. astrbot/core/provider/sources/anthropic_source.py +48 -30
  158. astrbot/core/provider/sources/azure_tts_source.py +17 -13
  159. astrbot/core/provider/sources/coze_api_client.py +27 -17
  160. astrbot/core/provider/sources/coze_source.py +104 -87
  161. astrbot/core/provider/sources/dashscope_source.py +18 -11
  162. astrbot/core/provider/sources/dashscope_tts.py +36 -23
  163. astrbot/core/provider/sources/dify_source.py +25 -20
  164. astrbot/core/provider/sources/edge_tts_source.py +21 -17
  165. astrbot/core/provider/sources/fishaudio_tts_api_source.py +22 -14
  166. astrbot/core/provider/sources/gemini_embedding_source.py +12 -13
  167. astrbot/core/provider/sources/gemini_source.py +72 -58
  168. astrbot/core/provider/sources/gemini_tts_source.py +8 -6
  169. astrbot/core/provider/sources/gsv_selfhosted_source.py +17 -14
  170. astrbot/core/provider/sources/gsvi_tts_source.py +11 -7
  171. astrbot/core/provider/sources/minimax_tts_api_source.py +50 -40
  172. astrbot/core/provider/sources/openai_embedding_source.py +6 -8
  173. astrbot/core/provider/sources/openai_source.py +102 -69
  174. astrbot/core/provider/sources/openai_tts_api_source.py +14 -6
  175. astrbot/core/provider/sources/sensevoice_selfhosted_source.py +13 -11
  176. astrbot/core/provider/sources/vllm_rerank_source.py +10 -4
  177. astrbot/core/provider/sources/volcengine_tts.py +38 -31
  178. astrbot/core/provider/sources/whisper_api_source.py +14 -12
  179. astrbot/core/provider/sources/whisper_selfhosted_source.py +15 -11
  180. astrbot/core/provider/sources/xinference_rerank_source.py +116 -0
  181. astrbot/core/provider/sources/xinference_stt_provider.py +197 -0
  182. astrbot/core/star/__init__.py +16 -11
  183. astrbot/core/star/config.py +10 -15
  184. astrbot/core/star/context.py +109 -84
  185. astrbot/core/star/filter/__init__.py +4 -3
  186. astrbot/core/star/filter/command.py +30 -28
  187. astrbot/core/star/filter/command_group.py +27 -24
  188. astrbot/core/star/filter/custom_filter.py +6 -5
  189. astrbot/core/star/filter/event_message_type.py +4 -2
  190. astrbot/core/star/filter/permission.py +4 -2
  191. astrbot/core/star/filter/platform_adapter_type.py +4 -2
  192. astrbot/core/star/filter/regex.py +4 -2
  193. astrbot/core/star/register/__init__.py +19 -19
  194. astrbot/core/star/register/star.py +6 -2
  195. astrbot/core/star/register/star_handler.py +96 -73
  196. astrbot/core/star/session_llm_manager.py +48 -14
  197. astrbot/core/star/session_plugin_manager.py +29 -15
  198. astrbot/core/star/star.py +1 -2
  199. astrbot/core/star/star_handler.py +13 -8
  200. astrbot/core/star/star_manager.py +151 -59
  201. astrbot/core/star/star_tools.py +44 -37
  202. astrbot/core/star/updator.py +10 -10
  203. astrbot/core/umop_config_router.py +10 -4
  204. astrbot/core/updator.py +13 -5
  205. astrbot/core/utils/astrbot_path.py +3 -5
  206. astrbot/core/utils/dify_api_client.py +33 -15
  207. astrbot/core/utils/io.py +66 -42
  208. astrbot/core/utils/log_pipe.py +1 -1
  209. astrbot/core/utils/metrics.py +7 -7
  210. astrbot/core/utils/path_util.py +15 -16
  211. astrbot/core/utils/pip_installer.py +5 -5
  212. astrbot/core/utils/session_waiter.py +19 -20
  213. astrbot/core/utils/shared_preferences.py +45 -20
  214. astrbot/core/utils/t2i/__init__.py +4 -1
  215. astrbot/core/utils/t2i/network_strategy.py +35 -26
  216. astrbot/core/utils/t2i/renderer.py +11 -5
  217. astrbot/core/utils/t2i/template_manager.py +14 -15
  218. astrbot/core/utils/tencent_record_helper.py +19 -13
  219. astrbot/core/utils/version_comparator.py +10 -13
  220. astrbot/core/zip_updator.py +43 -40
  221. astrbot/dashboard/routes/__init__.py +18 -18
  222. astrbot/dashboard/routes/auth.py +10 -8
  223. astrbot/dashboard/routes/chat.py +30 -21
  224. astrbot/dashboard/routes/config.py +92 -75
  225. astrbot/dashboard/routes/conversation.py +46 -39
  226. astrbot/dashboard/routes/file.py +4 -2
  227. astrbot/dashboard/routes/knowledge_base.py +47 -40
  228. astrbot/dashboard/routes/log.py +9 -4
  229. astrbot/dashboard/routes/persona.py +19 -16
  230. astrbot/dashboard/routes/plugin.py +69 -55
  231. astrbot/dashboard/routes/route.py +3 -1
  232. astrbot/dashboard/routes/session_management.py +130 -116
  233. astrbot/dashboard/routes/stat.py +34 -34
  234. astrbot/dashboard/routes/t2i.py +15 -12
  235. astrbot/dashboard/routes/tools.py +47 -52
  236. astrbot/dashboard/routes/update.py +32 -28
  237. astrbot/dashboard/server.py +30 -26
  238. astrbot/dashboard/utils.py +8 -4
  239. {astrbot-4.5.0.dist-info → astrbot-4.5.2.dist-info}/METADATA +4 -2
  240. astrbot-4.5.2.dist-info/RECORD +261 -0
  241. astrbot-4.5.0.dist-info/RECORD +0 -258
  242. {astrbot-4.5.0.dist-info → astrbot-4.5.2.dist-info}/WHEEL +0 -0
  243. {astrbot-4.5.0.dist-info → astrbot-4.5.2.dist-info}/entry_points.txt +0 -0
  244. {astrbot-4.5.0.dist-info → astrbot-4.5.2.dist-info}/licenses/LICENSE +0 -0
@@ -18,6 +18,7 @@ class FixedSizeChunker(BaseChunker):
18
18
  Args:
19
19
  chunk_size: 块的大小(字符数)
20
20
  chunk_overlap: 块之间的重叠字符数
21
+
21
22
  """
22
23
  self.chunk_size = chunk_size
23
24
  self.chunk_overlap = chunk_overlap
@@ -32,6 +33,7 @@ class FixedSizeChunker(BaseChunker):
32
33
 
33
34
  Returns:
34
35
  list[str]: 分块后的文本列表
36
+
35
37
  """
36
38
  chunk_size = kwargs.get("chunk_size", self.chunk_size)
37
39
  chunk_overlap = kwargs.get("chunk_overlap", self.chunk_overlap)
@@ -1,4 +1,5 @@
1
1
  from collections.abc import Callable
2
+
2
3
  from .base import BaseChunker
3
4
 
4
5
 
@@ -11,8 +12,7 @@ class RecursiveCharacterChunker(BaseChunker):
11
12
  is_separator_regex: bool = False,
12
13
  separators: list[str] | None = None,
13
14
  ):
14
- """
15
- 初始化递归字符文本分割器
15
+ """初始化递归字符文本分割器
16
16
 
17
17
  Args:
18
18
  chunk_size: 每个文本块的最大大小
@@ -20,6 +20,7 @@ class RecursiveCharacterChunker(BaseChunker):
20
20
  length_function: 计算文本长度的函数
21
21
  is_separator_regex: 分隔符是否为正则表达式
22
22
  separators: 用于分割文本的分隔符列表,按优先级排序
23
+
23
24
  """
24
25
  self.chunk_size = chunk_size
25
26
  self.chunk_overlap = chunk_overlap
@@ -39,8 +40,7 @@ class RecursiveCharacterChunker(BaseChunker):
39
40
  ]
40
41
 
41
42
  async def chunk(self, text: str, **kwargs) -> list[str]:
42
- """
43
- 递归地将文本分割成块
43
+ """递归地将文本分割成块
44
44
 
45
45
  Args:
46
46
  text: 要分割的文本
@@ -49,6 +49,7 @@ class RecursiveCharacterChunker(BaseChunker):
49
49
 
50
50
  Returns:
51
51
  分割后的文本块列表
52
+
52
53
  """
53
54
  if not text:
54
55
  return []
@@ -90,7 +91,7 @@ class RecursiveCharacterChunker(BaseChunker):
90
91
  combined_text,
91
92
  chunk_size=chunk_size,
92
93
  chunk_overlap=overlap,
93
- )
94
+ ),
94
95
  )
95
96
  current_chunk = []
96
97
  current_chunk_length = 0
@@ -98,8 +99,10 @@ class RecursiveCharacterChunker(BaseChunker):
98
99
  # 递归分割过大的部分
99
100
  final_chunks.extend(
100
101
  await self.chunk(
101
- split, chunk_size=chunk_size, chunk_overlap=overlap
102
- )
102
+ split,
103
+ chunk_size=chunk_size,
104
+ chunk_overlap=overlap,
105
+ ),
103
106
  )
104
107
  # 如果添加这部分会使当前块超过chunk_size
105
108
  elif current_chunk_length + split_length > chunk_size:
@@ -132,16 +135,19 @@ class RecursiveCharacterChunker(BaseChunker):
132
135
  return [text]
133
136
 
134
137
  def _split_by_character(
135
- self, text: str, chunk_size: int | None = None, overlap: int | None = None
138
+ self,
139
+ text: str,
140
+ chunk_size: int | None = None,
141
+ overlap: int | None = None,
136
142
  ) -> list[str]:
137
- """
138
- 按字符级别分割文本
143
+ """按字符级别分割文本
139
144
 
140
145
  Args:
141
146
  text: 要分割的文本
142
147
 
143
148
  Returns:
144
149
  分割后的文本块列表
150
+
145
151
  """
146
152
  chunk_size = chunk_size or self.chunk_size
147
153
  overlap = overlap or self.chunk_overlap
@@ -1,18 +1,18 @@
1
1
  from contextlib import asynccontextmanager
2
2
  from pathlib import Path
3
3
 
4
- from sqlmodel import col, desc
5
- from sqlalchemy import text, func, select, update, delete
4
+ from sqlalchemy import delete, func, select, text, update
6
5
  from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
6
+ from sqlmodel import col, desc
7
7
 
8
8
  from astrbot.core import logger
9
+ from astrbot.core.db.vec_db.faiss_impl import FaissVecDB
9
10
  from astrbot.core.knowledge_base.models import (
10
11
  BaseKBModel,
11
12
  KBDocument,
12
13
  KBMedia,
13
14
  KnowledgeBase,
14
15
  )
15
- from astrbot.core.db.vec_db.faiss_impl import FaissVecDB
16
16
 
17
17
 
18
18
  class KBSQLiteDatabase:
@@ -21,6 +21,7 @@ class KBSQLiteDatabase:
21
21
 
22
22
  Args:
23
23
  db_path: 数据库文件路径, 默认为 data/knowledge_base/kb.db
24
+
24
25
  """
25
26
  self.db_path = db_path
26
27
  self.DATABASE_URL = f"sqlite+aiosqlite:///{db_path}"
@@ -85,77 +86,77 @@ class KBSQLiteDatabase:
85
86
  await session.execute(
86
87
  text(
87
88
  "CREATE INDEX IF NOT EXISTS idx_kb_kb_id "
88
- "ON knowledge_bases(kb_id)"
89
- )
89
+ "ON knowledge_bases(kb_id)",
90
+ ),
90
91
  )
91
92
  await session.execute(
92
93
  text(
93
94
  "CREATE INDEX IF NOT EXISTS idx_kb_name "
94
- "ON knowledge_bases(kb_name)"
95
- )
95
+ "ON knowledge_bases(kb_name)",
96
+ ),
96
97
  )
97
98
  await session.execute(
98
99
  text(
99
100
  "CREATE INDEX IF NOT EXISTS idx_kb_created_at "
100
- "ON knowledge_bases(created_at)"
101
- )
101
+ "ON knowledge_bases(created_at)",
102
+ ),
102
103
  )
103
104
 
104
105
  # 创建文档表索引
105
106
  await session.execute(
106
107
  text(
107
108
  "CREATE INDEX IF NOT EXISTS idx_doc_doc_id "
108
- "ON kb_documents(doc_id)"
109
- )
109
+ "ON kb_documents(doc_id)",
110
+ ),
110
111
  )
111
112
  await session.execute(
112
113
  text(
113
114
  "CREATE INDEX IF NOT EXISTS idx_doc_kb_id "
114
- "ON kb_documents(kb_id)"
115
- )
115
+ "ON kb_documents(kb_id)",
116
+ ),
116
117
  )
117
118
  await session.execute(
118
119
  text(
119
120
  "CREATE INDEX IF NOT EXISTS idx_doc_name "
120
- "ON kb_documents(doc_name)"
121
- )
121
+ "ON kb_documents(doc_name)",
122
+ ),
122
123
  )
123
124
  await session.execute(
124
125
  text(
125
126
  "CREATE INDEX IF NOT EXISTS idx_doc_type "
126
- "ON kb_documents(file_type)"
127
- )
127
+ "ON kb_documents(file_type)",
128
+ ),
128
129
  )
129
130
  await session.execute(
130
131
  text(
131
132
  "CREATE INDEX IF NOT EXISTS idx_doc_created_at "
132
- "ON kb_documents(created_at)"
133
- )
133
+ "ON kb_documents(created_at)",
134
+ ),
134
135
  )
135
136
 
136
137
  # 创建多媒体表索引
137
138
  await session.execute(
138
139
  text(
139
140
  "CREATE INDEX IF NOT EXISTS idx_media_media_id "
140
- "ON kb_media(media_id)"
141
- )
141
+ "ON kb_media(media_id)",
142
+ ),
142
143
  )
143
144
  await session.execute(
144
145
  text(
145
146
  "CREATE INDEX IF NOT EXISTS idx_media_doc_id "
146
- "ON kb_media(doc_id)"
147
- )
147
+ "ON kb_media(doc_id)",
148
+ ),
148
149
  )
149
150
  await session.execute(
150
151
  text(
151
- "CREATE INDEX IF NOT EXISTS idx_media_kb_id ON kb_media(kb_id)"
152
- )
152
+ "CREATE INDEX IF NOT EXISTS idx_media_kb_id ON kb_media(kb_id)",
153
+ ),
153
154
  )
154
155
  await session.execute(
155
156
  text(
156
157
  "CREATE INDEX IF NOT EXISTS idx_media_type "
157
- "ON kb_media(media_type)"
158
- )
158
+ "ON kb_media(media_type)",
159
+ ),
159
160
  )
160
161
 
161
162
  await session.commit()
@@ -208,7 +209,10 @@ class KBSQLiteDatabase:
208
209
  return result.scalar_one_or_none()
209
210
 
210
211
  async def list_documents_by_kb(
211
- self, kb_id: str, offset: int = 0, limit: int = 100
212
+ self,
213
+ kb_id: str,
214
+ offset: int = 0,
215
+ limit: int = 100,
212
216
  ) -> list[KBDocument]:
213
217
  """列出知识库的所有文档"""
214
218
  async with self.get_db() as session:
@@ -226,7 +230,7 @@ class KBSQLiteDatabase:
226
230
  """统计知识库的文档数量"""
227
231
  async with self.get_db() as session:
228
232
  stmt = select(func.count(col(KBDocument.id))).where(
229
- col(KBDocument.kb_id) == kb_id
233
+ col(KBDocument.kb_id) == kb_id,
230
234
  )
231
235
  result = await session.execute(stmt)
232
236
  return result.scalar() or 0
@@ -252,12 +256,11 @@ class KBSQLiteDatabase:
252
256
  async def delete_document_by_id(self, doc_id: str, vec_db: FaissVecDB):
253
257
  """删除单个文档及其相关数据"""
254
258
  # 在知识库表中删除
255
- async with self.get_db() as session:
256
- async with session.begin():
257
- # 删除文档记录
258
- delete_stmt = delete(KBDocument).where(col(KBDocument.doc_id) == doc_id)
259
- await session.execute(delete_stmt)
260
- await session.commit()
259
+ async with self.get_db() as session, session.begin():
260
+ # 删除文档记录
261
+ delete_stmt = delete(KBDocument).where(col(KBDocument.doc_id) == doc_id)
262
+ await session.execute(delete_stmt)
263
+ await session.commit()
261
264
 
262
265
  # 在 vec db 中删除相关向量
263
266
  await vec_db.delete_documents(metadata_filters={"kb_doc_id": doc_id})
@@ -282,18 +285,17 @@ class KBSQLiteDatabase:
282
285
  """更新知识库统计信息"""
283
286
  chunk_cnt = await vec_db.count_documents()
284
287
 
285
- async with self.get_db() as session:
286
- async with session.begin():
287
- update_stmt = (
288
- update(KnowledgeBase)
289
- .where(col(KnowledgeBase.kb_id) == kb_id)
290
- .values(
291
- doc_count=select(func.count(col(KBDocument.id)))
292
- .where(col(KBDocument.kb_id) == kb_id)
293
- .scalar_subquery(),
294
- chunk_count=chunk_cnt,
295
- )
288
+ async with self.get_db() as session, session.begin():
289
+ update_stmt = (
290
+ update(KnowledgeBase)
291
+ .where(col(KnowledgeBase.kb_id) == kb_id)
292
+ .values(
293
+ doc_count=select(func.count(col(KBDocument.id)))
294
+ .where(col(KBDocument.kb_id) == kb_id)
295
+ .scalar_subquery(),
296
+ chunk_count=chunk_cnt,
296
297
  )
298
+ )
297
299
 
298
- await session.execute(update_stmt)
299
- await session.commit()
300
+ await session.execute(update_stmt)
301
+ await session.commit()
@@ -1,16 +1,19 @@
1
- import uuid
2
- import aiofiles
3
1
  import json
2
+ import uuid
4
3
  from pathlib import Path
5
- from .models import KnowledgeBase, KBDocument, KBMedia
6
- from .kb_db_sqlite import KBSQLiteDatabase
4
+
5
+ import aiofiles
6
+
7
+ from astrbot.core import logger
7
8
  from astrbot.core.db.vec_db.base import BaseVecDB
8
9
  from astrbot.core.db.vec_db.faiss_impl.vec_db import FaissVecDB
9
- from astrbot.core.provider.provider import EmbeddingProvider, RerankProvider
10
10
  from astrbot.core.provider.manager import ProviderManager
11
- from .parsers.util import select_parser
11
+ from astrbot.core.provider.provider import EmbeddingProvider, RerankProvider
12
+
12
13
  from .chunking.base import BaseChunker
13
- from astrbot.core import logger
14
+ from .kb_db_sqlite import KBSQLiteDatabase
15
+ from .models import KBDocument, KBMedia, KnowledgeBase
16
+ from .parsers.util import select_parser
14
17
 
15
18
 
16
19
  class KBHelper:
@@ -45,11 +48,11 @@ class KBHelper:
45
48
  if not self.kb.embedding_provider_id:
46
49
  raise ValueError(f"知识库 {self.kb.kb_name} 未配置 Embedding Provider")
47
50
  ep: EmbeddingProvider = await self.prov_mgr.get_provider_by_id(
48
- self.kb.embedding_provider_id
51
+ self.kb.embedding_provider_id,
49
52
  ) # type: ignore
50
53
  if not ep:
51
54
  raise ValueError(
52
- f"无法找到 ID 为 {self.kb.embedding_provider_id} 的 Embedding Provider"
55
+ f"无法找到 ID 为 {self.kb.embedding_provider_id} 的 Embedding Provider",
53
56
  )
54
57
  return ep
55
58
 
@@ -57,11 +60,11 @@ class KBHelper:
57
60
  if not self.kb.rerank_provider_id:
58
61
  return None
59
62
  rp: RerankProvider = await self.prov_mgr.get_provider_by_id(
60
- self.kb.rerank_provider_id
63
+ self.kb.rerank_provider_id,
61
64
  ) # type: ignore
62
65
  if not rp:
63
66
  raise ValueError(
64
- f"无法找到 ID 为 {self.kb.rerank_provider_id} 的 Rerank Provider"
67
+ f"无法找到 ID 为 {self.kb.rerank_provider_id} 的 Rerank Provider",
65
68
  )
66
69
  return rp
67
70
 
@@ -122,6 +125,7 @@ class KBHelper:
122
125
  - stage: 当前阶段 ('parsing', 'chunking', 'embedding')
123
126
  - current: 当前进度
124
127
  - total: 总数
128
+
125
129
  """
126
130
  await self._ensure_vec_db()
127
131
  doc_id = str(uuid.uuid4())
@@ -162,7 +166,9 @@ class KBHelper:
162
166
  await progress_callback("chunking", 0, 100)
163
167
 
164
168
  chunks_text = await self.chunker.chunk(
165
- text_content, chunk_size=chunk_size, chunk_overlap=chunk_overlap
169
+ text_content,
170
+ chunk_size=chunk_size,
171
+ chunk_overlap=chunk_overlap,
166
172
  )
167
173
  contents = []
168
174
  metadatas = []
@@ -173,7 +179,7 @@ class KBHelper:
173
179
  "kb_id": self.kb.kb_id,
174
180
  "kb_doc_id": doc_id,
175
181
  "chunk_index": idx,
176
- }
182
+ },
177
183
  )
178
184
 
179
185
  if progress_callback:
@@ -234,7 +240,9 @@ class KBHelper:
234
240
  raise e
235
241
 
236
242
  async def list_documents(
237
- self, offset: int = 0, limit: int = 100
243
+ self,
244
+ offset: int = 0,
245
+ limit: int = 100,
238
246
  ) -> list[KBDocument]:
239
247
  """列出知识库的所有文档"""
240
248
  docs = await self.kb_db.list_documents_by_kb(self.kb.kb_id, offset, limit)
@@ -288,12 +296,17 @@ class KBHelper:
288
296
  await session.refresh(doc)
289
297
 
290
298
  async def get_chunks_by_doc_id(
291
- self, doc_id: str, offset: int = 0, limit: int = 100
299
+ self,
300
+ doc_id: str,
301
+ offset: int = 0,
302
+ limit: int = 100,
292
303
  ) -> list[dict]:
293
304
  """获取文档的所有块及其元数据"""
294
305
  vec_db: FaissVecDB = self.vec_db # type: ignore
295
306
  chunks = await vec_db.document_storage.get_documents(
296
- metadata_filters={"kb_doc_id": doc_id}, offset=offset, limit=limit
307
+ metadata_filters={"kb_doc_id": doc_id},
308
+ offset=offset,
309
+ limit=limit,
297
310
  )
298
311
  result = []
299
312
  for chunk in chunks:
@@ -306,7 +319,7 @@ class KBHelper:
306
319
  "chunk_index": chunk_md["chunk_index"],
307
320
  "content": chunk["text"],
308
321
  "char_count": len(chunk["text"]),
309
- }
322
+ },
310
323
  )
311
324
  return result
312
325
 
@@ -1,19 +1,17 @@
1
1
  import traceback
2
2
  from pathlib import Path
3
+
3
4
  from astrbot.core import logger
4
5
  from astrbot.core.provider.manager import ProviderManager
5
6
 
6
- from .retrieval.manager import RetrievalManager, RetrievalResult
7
- from .retrieval.sparse_retriever import SparseRetriever
8
- from .retrieval.rank_fusion import RankFusion
9
- from .kb_db_sqlite import KBSQLiteDatabase
10
-
11
7
  # from .chunking.fixed_size import FixedSizeChunker
12
8
  from .chunking.recursive import RecursiveCharacterChunker
9
+ from .kb_db_sqlite import KBSQLiteDatabase
13
10
  from .kb_helper import KBHelper
14
-
15
11
  from .models import KnowledgeBase
16
-
12
+ from .retrieval.manager import RetrievalManager, RetrievalResult
13
+ from .retrieval.rank_fusion import RankFusion
14
+ from .retrieval.sparse_retriever import SparseRetriever
17
15
 
18
16
  FILES_PATH = "data/knowledge_base"
19
17
  DB_PATH = Path(FILES_PATH) / "kb.db"
@@ -257,6 +255,7 @@ class KnowledgeBaseManager:
257
255
 
258
256
  Returns:
259
257
  str: 格式化的上下文文本
258
+
260
259
  """
261
260
  lines = ["以下是相关的知识库内容,请参考这些信息回答用户的问题:\n"]
262
261
 
@@ -1,7 +1,7 @@
1
1
  import uuid
2
2
  from datetime import datetime, timezone
3
3
 
4
- from sqlmodel import Field, SQLModel, Text, UniqueConstraint, MetaData
4
+ from sqlmodel import Field, MetaData, SQLModel, Text, UniqueConstraint
5
5
 
6
6
 
7
7
  class BaseKBModel(SQLModel, table=False):
@@ -17,7 +17,9 @@ class KnowledgeBase(BaseKBModel, table=True):
17
17
  __tablename__ = "knowledge_bases" # type: ignore
18
18
 
19
19
  id: int | None = Field(
20
- primary_key=True, sa_column_kwargs={"autoincrement": True}, default=None
20
+ primary_key=True,
21
+ sa_column_kwargs={"autoincrement": True},
22
+ default=None,
21
23
  )
22
24
  kb_id: str = Field(
23
25
  max_length=36,
@@ -63,7 +65,9 @@ class KBDocument(BaseKBModel, table=True):
63
65
  __tablename__ = "kb_documents" # type: ignore
64
66
 
65
67
  id: int | None = Field(
66
- primary_key=True, sa_column_kwargs={"autoincrement": True}, default=None
68
+ primary_key=True,
69
+ sa_column_kwargs={"autoincrement": True},
70
+ default=None,
67
71
  )
68
72
  doc_id: str = Field(
69
73
  max_length=36,
@@ -95,7 +99,9 @@ class KBMedia(BaseKBModel, table=True):
95
99
  __tablename__ = "kb_media" # type: ignore
96
100
 
97
101
  id: int | None = Field(
98
- primary_key=True, sa_column_kwargs={"autoincrement": True}, default=None
102
+ primary_key=True,
103
+ sa_column_kwargs={"autoincrement": True},
104
+ default=None,
99
105
  )
100
106
  media_id: str = Field(
101
107
  max_length=36,
@@ -1,15 +1,13 @@
1
- """
2
- 文档解析器模块
3
- """
1
+ """文档解析器模块"""
4
2
 
5
3
  from .base import BaseParser, MediaItem, ParseResult
6
- from .text_parser import TextParser
7
4
  from .pdf_parser import PDFParser
5
+ from .text_parser import TextParser
8
6
 
9
7
  __all__ = [
10
8
  "BaseParser",
11
9
  "MediaItem",
10
+ "PDFParser",
12
11
  "ParseResult",
13
12
  "TextParser",
14
- "PDFParser",
15
13
  ]
@@ -47,4 +47,5 @@ class BaseParser(ABC):
47
47
 
48
48
  Returns:
49
49
  ParseResult: 解析结果
50
+
50
51
  """
@@ -1,11 +1,12 @@
1
1
  import io
2
2
  import os
3
3
 
4
+ from markitdown_no_magika import MarkItDown, StreamInfo
5
+
4
6
  from astrbot.core.knowledge_base.parsers.base import (
5
7
  BaseParser,
6
8
  ParseResult,
7
9
  )
8
- from markitdown_no_magika import MarkItDown, StreamInfo
9
10
 
10
11
 
11
12
  class MarkitdownParser(BaseParser):
@@ -29,6 +29,7 @@ class PDFParser(BaseParser):
29
29
 
30
30
  Returns:
31
31
  ParseResult: 包含文本和图片的解析结果
32
+
32
33
  """
33
34
  pdf_file = io.BytesIO(file_content)
34
35
  reader = PdfReader(pdf_file)
@@ -87,7 +88,7 @@ class PDFParser(BaseParser):
87
88
  file_name=f"page_{page_num}_img_{image_counter}.{ext}",
88
89
  content=image_data,
89
90
  mime_type=mime_type,
90
- )
91
+ ),
91
92
  )
92
93
  except Exception:
93
94
  # 单个图片提取失败不影响整体
@@ -26,6 +26,7 @@ class TextParser(BaseParser):
26
26
 
27
27
  Raises:
28
28
  ValueError: 如果无法解码文件
29
+
29
30
  """
30
31
  # 尝试多种编码
31
32
  for encoding in ["utf-8", "gbk", "gb2312", "gb18030"]:
@@ -6,7 +6,7 @@ async def select_parser(ext: str) -> BaseParser:
6
6
  from .markitdown_parser import MarkitdownParser
7
7
 
8
8
  return MarkitdownParser()
9
- elif ext == ".pdf":
9
+ if ext == ".pdf":
10
10
  from .pdf_parser import PDFParser
11
11
 
12
12
  return PDFParser()
@@ -1,16 +1,14 @@
1
- """
2
- 检索模块
3
- """
1
+ """检索模块"""
4
2
 
5
3
  from .manager import RetrievalManager, RetrievalResult
6
- from .sparse_retriever import SparseRetriever, SparseResult
7
- from .rank_fusion import RankFusion, FusedResult
4
+ from .rank_fusion import FusedResult, RankFusion
5
+ from .sparse_retriever import SparseResult, SparseRetriever
8
6
 
9
7
  __all__ = [
8
+ "FusedResult",
9
+ "RankFusion",
10
10
  "RetrievalManager",
11
11
  "RetrievalResult",
12
- "SparseRetriever",
13
12
  "SparseResult",
14
- "RankFusion",
15
- "FusedResult",
13
+ "SparseRetriever",
16
14
  ]