AstrBot 3.5.6__py3-none-any.whl → 4.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (288) hide show
  1. astrbot/api/__init__.py +16 -4
  2. astrbot/api/all.py +2 -1
  3. astrbot/api/event/__init__.py +5 -6
  4. astrbot/api/event/filter/__init__.py +37 -34
  5. astrbot/api/platform/__init__.py +7 -8
  6. astrbot/api/provider/__init__.py +8 -7
  7. astrbot/api/star/__init__.py +3 -4
  8. astrbot/api/util/__init__.py +2 -2
  9. astrbot/cli/__init__.py +1 -0
  10. astrbot/cli/__main__.py +18 -197
  11. astrbot/cli/commands/__init__.py +6 -0
  12. astrbot/cli/commands/cmd_conf.py +209 -0
  13. astrbot/cli/commands/cmd_init.py +56 -0
  14. astrbot/cli/commands/cmd_plug.py +245 -0
  15. astrbot/cli/commands/cmd_run.py +62 -0
  16. astrbot/cli/utils/__init__.py +18 -0
  17. astrbot/cli/utils/basic.py +76 -0
  18. astrbot/cli/utils/plugin.py +246 -0
  19. astrbot/cli/utils/version_comparator.py +90 -0
  20. astrbot/core/__init__.py +17 -19
  21. astrbot/core/agent/agent.py +14 -0
  22. astrbot/core/agent/handoff.py +38 -0
  23. astrbot/core/agent/hooks.py +30 -0
  24. astrbot/core/agent/mcp_client.py +385 -0
  25. astrbot/core/agent/message.py +175 -0
  26. astrbot/core/agent/response.py +14 -0
  27. astrbot/core/agent/run_context.py +22 -0
  28. astrbot/core/agent/runners/__init__.py +3 -0
  29. astrbot/core/agent/runners/base.py +65 -0
  30. astrbot/core/agent/runners/coze/coze_agent_runner.py +367 -0
  31. astrbot/core/agent/runners/coze/coze_api_client.py +324 -0
  32. astrbot/core/agent/runners/dashscope/dashscope_agent_runner.py +403 -0
  33. astrbot/core/agent/runners/dify/dify_agent_runner.py +336 -0
  34. astrbot/core/agent/runners/dify/dify_api_client.py +195 -0
  35. astrbot/core/agent/runners/tool_loop_agent_runner.py +400 -0
  36. astrbot/core/agent/tool.py +285 -0
  37. astrbot/core/agent/tool_executor.py +17 -0
  38. astrbot/core/astr_agent_context.py +19 -0
  39. astrbot/core/astr_agent_hooks.py +36 -0
  40. astrbot/core/astr_agent_run_util.py +80 -0
  41. astrbot/core/astr_agent_tool_exec.py +246 -0
  42. astrbot/core/astrbot_config_mgr.py +275 -0
  43. astrbot/core/config/__init__.py +2 -2
  44. astrbot/core/config/astrbot_config.py +60 -20
  45. astrbot/core/config/default.py +1972 -453
  46. astrbot/core/config/i18n_utils.py +110 -0
  47. astrbot/core/conversation_mgr.py +285 -75
  48. astrbot/core/core_lifecycle.py +167 -62
  49. astrbot/core/db/__init__.py +305 -102
  50. astrbot/core/db/migration/helper.py +69 -0
  51. astrbot/core/db/migration/migra_3_to_4.py +357 -0
  52. astrbot/core/db/migration/migra_45_to_46.py +44 -0
  53. astrbot/core/db/migration/migra_webchat_session.py +131 -0
  54. astrbot/core/db/migration/shared_preferences_v3.py +48 -0
  55. astrbot/core/db/migration/sqlite_v3.py +497 -0
  56. astrbot/core/db/po.py +259 -55
  57. astrbot/core/db/sqlite.py +773 -528
  58. astrbot/core/db/vec_db/base.py +73 -0
  59. astrbot/core/db/vec_db/faiss_impl/__init__.py +3 -0
  60. astrbot/core/db/vec_db/faiss_impl/document_storage.py +392 -0
  61. astrbot/core/db/vec_db/faiss_impl/embedding_storage.py +93 -0
  62. astrbot/core/db/vec_db/faiss_impl/sqlite_init.sql +17 -0
  63. astrbot/core/db/vec_db/faiss_impl/vec_db.py +204 -0
  64. astrbot/core/event_bus.py +26 -22
  65. astrbot/core/exceptions.py +9 -0
  66. astrbot/core/file_token_service.py +98 -0
  67. astrbot/core/initial_loader.py +19 -10
  68. astrbot/core/knowledge_base/chunking/__init__.py +9 -0
  69. astrbot/core/knowledge_base/chunking/base.py +25 -0
  70. astrbot/core/knowledge_base/chunking/fixed_size.py +59 -0
  71. astrbot/core/knowledge_base/chunking/recursive.py +161 -0
  72. astrbot/core/knowledge_base/kb_db_sqlite.py +301 -0
  73. astrbot/core/knowledge_base/kb_helper.py +642 -0
  74. astrbot/core/knowledge_base/kb_mgr.py +330 -0
  75. astrbot/core/knowledge_base/models.py +120 -0
  76. astrbot/core/knowledge_base/parsers/__init__.py +13 -0
  77. astrbot/core/knowledge_base/parsers/base.py +51 -0
  78. astrbot/core/knowledge_base/parsers/markitdown_parser.py +26 -0
  79. astrbot/core/knowledge_base/parsers/pdf_parser.py +101 -0
  80. astrbot/core/knowledge_base/parsers/text_parser.py +42 -0
  81. astrbot/core/knowledge_base/parsers/url_parser.py +103 -0
  82. astrbot/core/knowledge_base/parsers/util.py +13 -0
  83. astrbot/core/knowledge_base/prompts.py +65 -0
  84. astrbot/core/knowledge_base/retrieval/__init__.py +14 -0
  85. astrbot/core/knowledge_base/retrieval/hit_stopwords.txt +767 -0
  86. astrbot/core/knowledge_base/retrieval/manager.py +276 -0
  87. astrbot/core/knowledge_base/retrieval/rank_fusion.py +142 -0
  88. astrbot/core/knowledge_base/retrieval/sparse_retriever.py +136 -0
  89. astrbot/core/log.py +21 -15
  90. astrbot/core/message/components.py +413 -287
  91. astrbot/core/message/message_event_result.py +35 -24
  92. astrbot/core/persona_mgr.py +192 -0
  93. astrbot/core/pipeline/__init__.py +14 -14
  94. astrbot/core/pipeline/content_safety_check/stage.py +13 -9
  95. astrbot/core/pipeline/content_safety_check/strategies/__init__.py +1 -2
  96. astrbot/core/pipeline/content_safety_check/strategies/baidu_aip.py +13 -14
  97. astrbot/core/pipeline/content_safety_check/strategies/keywords.py +2 -1
  98. astrbot/core/pipeline/content_safety_check/strategies/strategy.py +6 -6
  99. astrbot/core/pipeline/context.py +7 -1
  100. astrbot/core/pipeline/context_utils.py +107 -0
  101. astrbot/core/pipeline/preprocess_stage/stage.py +63 -36
  102. astrbot/core/pipeline/process_stage/method/agent_request.py +48 -0
  103. astrbot/core/pipeline/process_stage/method/agent_sub_stages/internal.py +464 -0
  104. astrbot/core/pipeline/process_stage/method/agent_sub_stages/third_party.py +202 -0
  105. astrbot/core/pipeline/process_stage/method/star_request.py +26 -32
  106. astrbot/core/pipeline/process_stage/stage.py +21 -15
  107. astrbot/core/pipeline/process_stage/utils.py +125 -0
  108. astrbot/core/pipeline/rate_limit_check/stage.py +34 -36
  109. astrbot/core/pipeline/respond/stage.py +142 -101
  110. astrbot/core/pipeline/result_decorate/stage.py +124 -57
  111. astrbot/core/pipeline/scheduler.py +21 -16
  112. astrbot/core/pipeline/session_status_check/stage.py +37 -0
  113. astrbot/core/pipeline/stage.py +11 -76
  114. astrbot/core/pipeline/waking_check/stage.py +69 -33
  115. astrbot/core/pipeline/whitelist_check/stage.py +10 -7
  116. astrbot/core/platform/__init__.py +6 -6
  117. astrbot/core/platform/astr_message_event.py +107 -129
  118. astrbot/core/platform/astrbot_message.py +32 -12
  119. astrbot/core/platform/manager.py +62 -18
  120. astrbot/core/platform/message_session.py +30 -0
  121. astrbot/core/platform/platform.py +16 -24
  122. astrbot/core/platform/platform_metadata.py +9 -4
  123. astrbot/core/platform/register.py +12 -7
  124. astrbot/core/platform/sources/aiocqhttp/aiocqhttp_message_event.py +136 -60
  125. astrbot/core/platform/sources/aiocqhttp/aiocqhttp_platform_adapter.py +126 -46
  126. astrbot/core/platform/sources/dingtalk/dingtalk_adapter.py +63 -31
  127. astrbot/core/platform/sources/dingtalk/dingtalk_event.py +30 -26
  128. astrbot/core/platform/sources/discord/client.py +129 -0
  129. astrbot/core/platform/sources/discord/components.py +139 -0
  130. astrbot/core/platform/sources/discord/discord_platform_adapter.py +473 -0
  131. astrbot/core/platform/sources/discord/discord_platform_event.py +313 -0
  132. astrbot/core/platform/sources/lark/lark_adapter.py +27 -18
  133. astrbot/core/platform/sources/lark/lark_event.py +39 -13
  134. astrbot/core/platform/sources/misskey/misskey_adapter.py +770 -0
  135. astrbot/core/platform/sources/misskey/misskey_api.py +964 -0
  136. astrbot/core/platform/sources/misskey/misskey_event.py +163 -0
  137. astrbot/core/platform/sources/misskey/misskey_utils.py +550 -0
  138. astrbot/core/platform/sources/qqofficial/qqofficial_message_event.py +149 -33
  139. astrbot/core/platform/sources/qqofficial/qqofficial_platform_adapter.py +41 -26
  140. astrbot/core/platform/sources/qqofficial_webhook/qo_webhook_adapter.py +36 -17
  141. astrbot/core/platform/sources/qqofficial_webhook/qo_webhook_event.py +3 -1
  142. astrbot/core/platform/sources/qqofficial_webhook/qo_webhook_server.py +14 -8
  143. astrbot/core/platform/sources/satori/satori_adapter.py +792 -0
  144. astrbot/core/platform/sources/satori/satori_event.py +432 -0
  145. astrbot/core/platform/sources/slack/client.py +164 -0
  146. astrbot/core/platform/sources/slack/slack_adapter.py +416 -0
  147. astrbot/core/platform/sources/slack/slack_event.py +253 -0
  148. astrbot/core/platform/sources/telegram/tg_adapter.py +100 -43
  149. astrbot/core/platform/sources/telegram/tg_event.py +136 -36
  150. astrbot/core/platform/sources/webchat/webchat_adapter.py +72 -22
  151. astrbot/core/platform/sources/webchat/webchat_event.py +46 -22
  152. astrbot/core/platform/sources/webchat/webchat_queue_mgr.py +35 -0
  153. astrbot/core/platform/sources/wechatpadpro/wechatpadpro_adapter.py +926 -0
  154. astrbot/core/platform/sources/wechatpadpro/wechatpadpro_message_event.py +178 -0
  155. astrbot/core/platform/sources/wechatpadpro/xml_data_parser.py +159 -0
  156. astrbot/core/platform/sources/wecom/wecom_adapter.py +169 -27
  157. astrbot/core/platform/sources/wecom/wecom_event.py +162 -77
  158. astrbot/core/platform/sources/wecom/wecom_kf.py +279 -0
  159. astrbot/core/platform/sources/wecom/wecom_kf_message.py +196 -0
  160. astrbot/core/platform/sources/wecom_ai_bot/WXBizJsonMsgCrypt.py +297 -0
  161. astrbot/core/platform/sources/wecom_ai_bot/__init__.py +15 -0
  162. astrbot/core/platform/sources/wecom_ai_bot/ierror.py +19 -0
  163. astrbot/core/platform/sources/wecom_ai_bot/wecomai_adapter.py +472 -0
  164. astrbot/core/platform/sources/wecom_ai_bot/wecomai_api.py +417 -0
  165. astrbot/core/platform/sources/wecom_ai_bot/wecomai_event.py +152 -0
  166. astrbot/core/platform/sources/wecom_ai_bot/wecomai_queue_mgr.py +153 -0
  167. astrbot/core/platform/sources/wecom_ai_bot/wecomai_server.py +168 -0
  168. astrbot/core/platform/sources/wecom_ai_bot/wecomai_utils.py +209 -0
  169. astrbot/core/platform/sources/weixin_official_account/weixin_offacc_adapter.py +306 -0
  170. astrbot/core/platform/sources/weixin_official_account/weixin_offacc_event.py +186 -0
  171. astrbot/core/platform_message_history_mgr.py +49 -0
  172. astrbot/core/provider/__init__.py +2 -3
  173. astrbot/core/provider/entites.py +8 -8
  174. astrbot/core/provider/entities.py +154 -98
  175. astrbot/core/provider/func_tool_manager.py +446 -458
  176. astrbot/core/provider/manager.py +345 -207
  177. astrbot/core/provider/provider.py +188 -73
  178. astrbot/core/provider/register.py +9 -7
  179. astrbot/core/provider/sources/anthropic_source.py +295 -115
  180. astrbot/core/provider/sources/azure_tts_source.py +224 -0
  181. astrbot/core/provider/sources/bailian_rerank_source.py +236 -0
  182. astrbot/core/provider/sources/dashscope_tts.py +138 -14
  183. astrbot/core/provider/sources/edge_tts_source.py +24 -19
  184. astrbot/core/provider/sources/fishaudio_tts_api_source.py +58 -13
  185. astrbot/core/provider/sources/gemini_embedding_source.py +61 -0
  186. astrbot/core/provider/sources/gemini_source.py +310 -132
  187. astrbot/core/provider/sources/gemini_tts_source.py +81 -0
  188. astrbot/core/provider/sources/groq_source.py +15 -0
  189. astrbot/core/provider/sources/gsv_selfhosted_source.py +151 -0
  190. astrbot/core/provider/sources/gsvi_tts_source.py +14 -7
  191. astrbot/core/provider/sources/minimax_tts_api_source.py +159 -0
  192. astrbot/core/provider/sources/openai_embedding_source.py +40 -0
  193. astrbot/core/provider/sources/openai_source.py +241 -145
  194. astrbot/core/provider/sources/openai_tts_api_source.py +18 -7
  195. astrbot/core/provider/sources/sensevoice_selfhosted_source.py +13 -11
  196. astrbot/core/provider/sources/vllm_rerank_source.py +71 -0
  197. astrbot/core/provider/sources/volcengine_tts.py +115 -0
  198. astrbot/core/provider/sources/whisper_api_source.py +18 -13
  199. astrbot/core/provider/sources/whisper_selfhosted_source.py +19 -12
  200. astrbot/core/provider/sources/xinference_rerank_source.py +116 -0
  201. astrbot/core/provider/sources/xinference_stt_provider.py +197 -0
  202. astrbot/core/provider/sources/zhipu_source.py +6 -73
  203. astrbot/core/star/__init__.py +43 -11
  204. astrbot/core/star/config.py +17 -18
  205. astrbot/core/star/context.py +362 -138
  206. astrbot/core/star/filter/__init__.py +4 -3
  207. astrbot/core/star/filter/command.py +111 -35
  208. astrbot/core/star/filter/command_group.py +46 -34
  209. astrbot/core/star/filter/custom_filter.py +6 -5
  210. astrbot/core/star/filter/event_message_type.py +4 -2
  211. astrbot/core/star/filter/permission.py +4 -2
  212. astrbot/core/star/filter/platform_adapter_type.py +45 -12
  213. astrbot/core/star/filter/regex.py +4 -2
  214. astrbot/core/star/register/__init__.py +19 -15
  215. astrbot/core/star/register/star.py +41 -13
  216. astrbot/core/star/register/star_handler.py +236 -86
  217. astrbot/core/star/session_llm_manager.py +280 -0
  218. astrbot/core/star/session_plugin_manager.py +170 -0
  219. astrbot/core/star/star.py +36 -43
  220. astrbot/core/star/star_handler.py +47 -85
  221. astrbot/core/star/star_manager.py +442 -260
  222. astrbot/core/star/star_tools.py +167 -45
  223. astrbot/core/star/updator.py +17 -20
  224. astrbot/core/umop_config_router.py +106 -0
  225. astrbot/core/updator.py +38 -13
  226. astrbot/core/utils/astrbot_path.py +39 -0
  227. astrbot/core/utils/command_parser.py +1 -1
  228. astrbot/core/utils/io.py +119 -60
  229. astrbot/core/utils/log_pipe.py +1 -1
  230. astrbot/core/utils/metrics.py +11 -10
  231. astrbot/core/utils/migra_helper.py +73 -0
  232. astrbot/core/utils/path_util.py +63 -62
  233. astrbot/core/utils/pip_installer.py +37 -15
  234. astrbot/core/utils/session_lock.py +29 -0
  235. astrbot/core/utils/session_waiter.py +19 -20
  236. astrbot/core/utils/shared_preferences.py +174 -34
  237. astrbot/core/utils/t2i/__init__.py +4 -1
  238. astrbot/core/utils/t2i/local_strategy.py +386 -238
  239. astrbot/core/utils/t2i/network_strategy.py +109 -49
  240. astrbot/core/utils/t2i/renderer.py +29 -14
  241. astrbot/core/utils/t2i/template/astrbot_powershell.html +184 -0
  242. astrbot/core/utils/t2i/template_manager.py +111 -0
  243. astrbot/core/utils/tencent_record_helper.py +115 -1
  244. astrbot/core/utils/version_comparator.py +10 -13
  245. astrbot/core/zip_updator.py +112 -65
  246. astrbot/dashboard/routes/__init__.py +20 -13
  247. astrbot/dashboard/routes/auth.py +20 -9
  248. astrbot/dashboard/routes/chat.py +297 -141
  249. astrbot/dashboard/routes/config.py +652 -55
  250. astrbot/dashboard/routes/conversation.py +107 -37
  251. astrbot/dashboard/routes/file.py +26 -0
  252. astrbot/dashboard/routes/knowledge_base.py +1244 -0
  253. astrbot/dashboard/routes/log.py +27 -2
  254. astrbot/dashboard/routes/persona.py +202 -0
  255. astrbot/dashboard/routes/plugin.py +197 -139
  256. astrbot/dashboard/routes/route.py +27 -7
  257. astrbot/dashboard/routes/session_management.py +354 -0
  258. astrbot/dashboard/routes/stat.py +85 -18
  259. astrbot/dashboard/routes/static_file.py +5 -2
  260. astrbot/dashboard/routes/t2i.py +233 -0
  261. astrbot/dashboard/routes/tools.py +184 -120
  262. astrbot/dashboard/routes/update.py +59 -36
  263. astrbot/dashboard/server.py +96 -36
  264. astrbot/dashboard/utils.py +165 -0
  265. astrbot-4.7.0.dist-info/METADATA +294 -0
  266. astrbot-4.7.0.dist-info/RECORD +274 -0
  267. {astrbot-3.5.6.dist-info → astrbot-4.7.0.dist-info}/WHEEL +1 -1
  268. astrbot/core/db/plugin/sqlite_impl.py +0 -112
  269. astrbot/core/db/sqlite_init.sql +0 -50
  270. astrbot/core/pipeline/platform_compatibility/stage.py +0 -56
  271. astrbot/core/pipeline/process_stage/method/llm_request.py +0 -606
  272. astrbot/core/platform/sources/gewechat/client.py +0 -806
  273. astrbot/core/platform/sources/gewechat/downloader.py +0 -55
  274. astrbot/core/platform/sources/gewechat/gewechat_event.py +0 -255
  275. astrbot/core/platform/sources/gewechat/gewechat_platform_adapter.py +0 -103
  276. astrbot/core/platform/sources/gewechat/xml_data_parser.py +0 -110
  277. astrbot/core/provider/sources/dashscope_source.py +0 -203
  278. astrbot/core/provider/sources/dify_source.py +0 -281
  279. astrbot/core/provider/sources/llmtuner_source.py +0 -132
  280. astrbot/core/rag/embedding/openai_source.py +0 -20
  281. astrbot/core/rag/knowledge_db_mgr.py +0 -94
  282. astrbot/core/rag/store/__init__.py +0 -9
  283. astrbot/core/rag/store/chroma_db.py +0 -42
  284. astrbot/core/utils/dify_api_client.py +0 -152
  285. astrbot-3.5.6.dist-info/METADATA +0 -249
  286. astrbot-3.5.6.dist-info/RECORD +0 -158
  287. {astrbot-3.5.6.dist-info → astrbot-4.7.0.dist-info}/entry_points.txt +0 -0
  288. {astrbot-3.5.6.dist-info → astrbot-4.7.0.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,161 @@
1
+ from collections.abc import Callable
2
+
3
+ from .base import BaseChunker
4
+
5
+
6
+ class RecursiveCharacterChunker(BaseChunker):
7
+ def __init__(
8
+ self,
9
+ chunk_size: int = 500,
10
+ chunk_overlap: int = 100,
11
+ length_function: Callable[[str], int] = len,
12
+ is_separator_regex: bool = False,
13
+ separators: list[str] | None = None,
14
+ ):
15
+ """初始化递归字符文本分割器
16
+
17
+ Args:
18
+ chunk_size: 每个文本块的最大大小
19
+ chunk_overlap: 每个文本块之间的重叠部分大小
20
+ length_function: 计算文本长度的函数
21
+ is_separator_regex: 分隔符是否为正则表达式
22
+ separators: 用于分割文本的分隔符列表,按优先级排序
23
+
24
+ """
25
+ self.chunk_size = chunk_size
26
+ self.chunk_overlap = chunk_overlap
27
+ self.length_function = length_function
28
+ self.is_separator_regex = is_separator_regex
29
+
30
+ # 默认分隔符列表,按优先级从高到低
31
+ self.separators = separators or [
32
+ "\n\n", # 段落
33
+ "\n", # 换行
34
+ "。", # 中文句子
35
+ ",", # 中文逗号
36
+ ". ", # 句子
37
+ ", ", # 逗号分隔
38
+ " ", # 单词
39
+ "", # 字符
40
+ ]
41
+
42
+ async def chunk(self, text: str, **kwargs) -> list[str]:
43
+ """递归地将文本分割成块
44
+
45
+ Args:
46
+ text: 要分割的文本
47
+ chunk_size: 每个文本块的最大大小
48
+ chunk_overlap: 每个文本块之间的重叠部分大小
49
+
50
+ Returns:
51
+ 分割后的文本块列表
52
+
53
+ """
54
+ if not text:
55
+ return []
56
+
57
+ overlap = kwargs.get("chunk_overlap", self.chunk_overlap)
58
+ chunk_size = kwargs.get("chunk_size", self.chunk_size)
59
+
60
+ text_length = self.length_function(text)
61
+ if text_length <= chunk_size:
62
+ return [text]
63
+
64
+ for separator in self.separators:
65
+ if separator == "":
66
+ return self._split_by_character(text, chunk_size, overlap)
67
+
68
+ if separator in text:
69
+ splits = text.split(separator)
70
+ # 重新添加分隔符(除了最后一个片段)
71
+ splits = [s + separator for s in splits[:-1]] + [splits[-1]]
72
+ splits = [s for s in splits if s]
73
+ if len(splits) == 1:
74
+ continue
75
+
76
+ # 递归合并分割后的文本块
77
+ final_chunks = []
78
+ current_chunk = []
79
+ current_chunk_length = 0
80
+
81
+ for split in splits:
82
+ split_length = self.length_function(split)
83
+
84
+ # 如果单个分割部分已经超过了chunk_size,需要递归分割
85
+ if split_length > chunk_size:
86
+ # 先处理当前积累的块
87
+ if current_chunk:
88
+ combined_text = "".join(current_chunk)
89
+ final_chunks.extend(
90
+ await self.chunk(
91
+ combined_text,
92
+ chunk_size=chunk_size,
93
+ chunk_overlap=overlap,
94
+ ),
95
+ )
96
+ current_chunk = []
97
+ current_chunk_length = 0
98
+
99
+ # 递归分割过大的部分
100
+ final_chunks.extend(
101
+ await self.chunk(
102
+ split,
103
+ chunk_size=chunk_size,
104
+ chunk_overlap=overlap,
105
+ ),
106
+ )
107
+ # 如果添加这部分会使当前块超过chunk_size
108
+ elif current_chunk_length + split_length > chunk_size:
109
+ # 合并当前块并添加到结果中
110
+ combined_text = "".join(current_chunk)
111
+ final_chunks.append(combined_text)
112
+
113
+ # 处理重叠部分
114
+ overlap_start = max(0, len(combined_text) - overlap)
115
+ if overlap_start > 0:
116
+ overlap_text = combined_text[overlap_start:]
117
+ current_chunk = [overlap_text, split]
118
+ current_chunk_length = (
119
+ self.length_function(overlap_text) + split_length
120
+ )
121
+ else:
122
+ current_chunk = [split]
123
+ current_chunk_length = split_length
124
+ else:
125
+ # 添加到当前块
126
+ current_chunk.append(split)
127
+ current_chunk_length += split_length
128
+
129
+ # 处理剩余的块
130
+ if current_chunk:
131
+ final_chunks.append("".join(current_chunk))
132
+
133
+ return final_chunks
134
+
135
+ return [text]
136
+
137
+ def _split_by_character(
138
+ self,
139
+ text: str,
140
+ chunk_size: int | None = None,
141
+ overlap: int | None = None,
142
+ ) -> list[str]:
143
+ """按字符级别分割文本
144
+
145
+ Args:
146
+ text: 要分割的文本
147
+
148
+ Returns:
149
+ 分割后的文本块列表
150
+
151
+ """
152
+ chunk_size = chunk_size or self.chunk_size
153
+ overlap = overlap or self.chunk_overlap
154
+ result = []
155
+ for i in range(0, len(text), chunk_size - overlap):
156
+ end = min(i + chunk_size, len(text))
157
+ result.append(text[i:end])
158
+ if end == len(text):
159
+ break
160
+
161
+ return result
@@ -0,0 +1,301 @@
1
+ from contextlib import asynccontextmanager
2
+ from pathlib import Path
3
+
4
+ from sqlalchemy import delete, func, select, text, update
5
+ from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine
6
+ from sqlmodel import col, desc
7
+
8
+ from astrbot.core import logger
9
+ from astrbot.core.db.vec_db.faiss_impl import FaissVecDB
10
+ from astrbot.core.knowledge_base.models import (
11
+ BaseKBModel,
12
+ KBDocument,
13
+ KBMedia,
14
+ KnowledgeBase,
15
+ )
16
+
17
+
18
+ class KBSQLiteDatabase:
19
+ def __init__(self, db_path: str = "data/knowledge_base/kb.db") -> None:
20
+ """初始化知识库数据库
21
+
22
+ Args:
23
+ db_path: 数据库文件路径, 默认为 data/knowledge_base/kb.db
24
+
25
+ """
26
+ self.db_path = db_path
27
+ self.DATABASE_URL = f"sqlite+aiosqlite:///{db_path}"
28
+ self.inited = False
29
+
30
+ # 确保目录存在
31
+ Path(db_path).parent.mkdir(parents=True, exist_ok=True)
32
+
33
+ # 创建异步引擎
34
+ self.engine = create_async_engine(
35
+ self.DATABASE_URL,
36
+ echo=False,
37
+ pool_pre_ping=True,
38
+ pool_recycle=3600,
39
+ )
40
+
41
+ # 创建会话工厂
42
+ self.async_session = async_sessionmaker(
43
+ self.engine,
44
+ class_=AsyncSession,
45
+ expire_on_commit=False,
46
+ )
47
+
48
+ @asynccontextmanager
49
+ async def get_db(self):
50
+ """获取数据库会话
51
+
52
+ 用法:
53
+ async with kb_db.get_db() as session:
54
+ # 执行数据库操作
55
+ result = await session.execute(stmt)
56
+ """
57
+ async with self.async_session() as session:
58
+ yield session
59
+
60
+ async def initialize(self) -> None:
61
+ """初始化数据库,创建表并配置 SQLite 参数"""
62
+ async with self.engine.begin() as conn:
63
+ # 创建所有知识库相关表
64
+ await conn.run_sync(BaseKBModel.metadata.create_all)
65
+
66
+ # 配置 SQLite 性能优化参数
67
+ await conn.execute(text("PRAGMA journal_mode=WAL"))
68
+ await conn.execute(text("PRAGMA synchronous=NORMAL"))
69
+ await conn.execute(text("PRAGMA cache_size=20000"))
70
+ await conn.execute(text("PRAGMA temp_store=MEMORY"))
71
+ await conn.execute(text("PRAGMA mmap_size=134217728"))
72
+ await conn.execute(text("PRAGMA optimize"))
73
+ await conn.commit()
74
+
75
+ self.inited = True
76
+
77
+ async def migrate_to_v1(self) -> None:
78
+ """执行知识库数据库 v1 迁移
79
+
80
+ 创建所有必要的索引以优化查询性能
81
+ """
82
+ async with self.get_db() as session:
83
+ session: AsyncSession
84
+ async with session.begin():
85
+ # 创建知识库表索引
86
+ await session.execute(
87
+ text(
88
+ "CREATE INDEX IF NOT EXISTS idx_kb_kb_id "
89
+ "ON knowledge_bases(kb_id)",
90
+ ),
91
+ )
92
+ await session.execute(
93
+ text(
94
+ "CREATE INDEX IF NOT EXISTS idx_kb_name "
95
+ "ON knowledge_bases(kb_name)",
96
+ ),
97
+ )
98
+ await session.execute(
99
+ text(
100
+ "CREATE INDEX IF NOT EXISTS idx_kb_created_at "
101
+ "ON knowledge_bases(created_at)",
102
+ ),
103
+ )
104
+
105
+ # 创建文档表索引
106
+ await session.execute(
107
+ text(
108
+ "CREATE INDEX IF NOT EXISTS idx_doc_doc_id "
109
+ "ON kb_documents(doc_id)",
110
+ ),
111
+ )
112
+ await session.execute(
113
+ text(
114
+ "CREATE INDEX IF NOT EXISTS idx_doc_kb_id "
115
+ "ON kb_documents(kb_id)",
116
+ ),
117
+ )
118
+ await session.execute(
119
+ text(
120
+ "CREATE INDEX IF NOT EXISTS idx_doc_name "
121
+ "ON kb_documents(doc_name)",
122
+ ),
123
+ )
124
+ await session.execute(
125
+ text(
126
+ "CREATE INDEX IF NOT EXISTS idx_doc_type "
127
+ "ON kb_documents(file_type)",
128
+ ),
129
+ )
130
+ await session.execute(
131
+ text(
132
+ "CREATE INDEX IF NOT EXISTS idx_doc_created_at "
133
+ "ON kb_documents(created_at)",
134
+ ),
135
+ )
136
+
137
+ # 创建多媒体表索引
138
+ await session.execute(
139
+ text(
140
+ "CREATE INDEX IF NOT EXISTS idx_media_media_id "
141
+ "ON kb_media(media_id)",
142
+ ),
143
+ )
144
+ await session.execute(
145
+ text(
146
+ "CREATE INDEX IF NOT EXISTS idx_media_doc_id "
147
+ "ON kb_media(doc_id)",
148
+ ),
149
+ )
150
+ await session.execute(
151
+ text(
152
+ "CREATE INDEX IF NOT EXISTS idx_media_kb_id ON kb_media(kb_id)",
153
+ ),
154
+ )
155
+ await session.execute(
156
+ text(
157
+ "CREATE INDEX IF NOT EXISTS idx_media_type "
158
+ "ON kb_media(media_type)",
159
+ ),
160
+ )
161
+
162
+ await session.commit()
163
+
164
+ async def close(self) -> None:
165
+ """关闭数据库连接"""
166
+ await self.engine.dispose()
167
+ logger.info(f"知识库数据库已关闭: {self.db_path}")
168
+
169
+ async def get_kb_by_id(self, kb_id: str) -> KnowledgeBase | None:
170
+ """根据 ID 获取知识库"""
171
+ async with self.get_db() as session:
172
+ stmt = select(KnowledgeBase).where(col(KnowledgeBase.kb_id) == kb_id)
173
+ result = await session.execute(stmt)
174
+ return result.scalar_one_or_none()
175
+
176
+ async def get_kb_by_name(self, kb_name: str) -> KnowledgeBase | None:
177
+ """根据名称获取知识库"""
178
+ async with self.get_db() as session:
179
+ stmt = select(KnowledgeBase).where(col(KnowledgeBase.kb_name) == kb_name)
180
+ result = await session.execute(stmt)
181
+ return result.scalar_one_or_none()
182
+
183
+ async def list_kbs(self, offset: int = 0, limit: int = 100) -> list[KnowledgeBase]:
184
+ """列出所有知识库"""
185
+ async with self.get_db() as session:
186
+ stmt = (
187
+ select(KnowledgeBase)
188
+ .offset(offset)
189
+ .limit(limit)
190
+ .order_by(desc(KnowledgeBase.created_at))
191
+ )
192
+ result = await session.execute(stmt)
193
+ return list(result.scalars().all())
194
+
195
+ async def count_kbs(self) -> int:
196
+ """统计知识库数量"""
197
+ async with self.get_db() as session:
198
+ stmt = select(func.count(col(KnowledgeBase.id)))
199
+ result = await session.execute(stmt)
200
+ return result.scalar() or 0
201
+
202
+ # ===== 文档查询 =====
203
+
204
+ async def get_document_by_id(self, doc_id: str) -> KBDocument | None:
205
+ """根据 ID 获取文档"""
206
+ async with self.get_db() as session:
207
+ stmt = select(KBDocument).where(col(KBDocument.doc_id) == doc_id)
208
+ result = await session.execute(stmt)
209
+ return result.scalar_one_or_none()
210
+
211
+ async def list_documents_by_kb(
212
+ self,
213
+ kb_id: str,
214
+ offset: int = 0,
215
+ limit: int = 100,
216
+ ) -> list[KBDocument]:
217
+ """列出知识库的所有文档"""
218
+ async with self.get_db() as session:
219
+ stmt = (
220
+ select(KBDocument)
221
+ .where(col(KBDocument.kb_id) == kb_id)
222
+ .offset(offset)
223
+ .limit(limit)
224
+ .order_by(desc(KBDocument.created_at))
225
+ )
226
+ result = await session.execute(stmt)
227
+ return list(result.scalars().all())
228
+
229
+ async def count_documents_by_kb(self, kb_id: str) -> int:
230
+ """统计知识库的文档数量"""
231
+ async with self.get_db() as session:
232
+ stmt = select(func.count(col(KBDocument.id))).where(
233
+ col(KBDocument.kb_id) == kb_id,
234
+ )
235
+ result = await session.execute(stmt)
236
+ return result.scalar() or 0
237
+
238
+ async def get_document_with_metadata(self, doc_id: str) -> dict | None:
239
+ async with self.get_db() as session:
240
+ stmt = (
241
+ select(KBDocument, KnowledgeBase)
242
+ .join(KnowledgeBase, col(KBDocument.kb_id) == col(KnowledgeBase.kb_id))
243
+ .where(col(KBDocument.doc_id) == doc_id)
244
+ )
245
+ result = await session.execute(stmt)
246
+ row = result.first()
247
+
248
+ if not row:
249
+ return None
250
+
251
+ return {
252
+ "document": row[0],
253
+ "knowledge_base": row[1],
254
+ }
255
+
256
+ async def delete_document_by_id(self, doc_id: str, vec_db: FaissVecDB):
257
+ """删除单个文档及其相关数据"""
258
+ # 在知识库表中删除
259
+ async with self.get_db() as session, session.begin():
260
+ # 删除文档记录
261
+ delete_stmt = delete(KBDocument).where(col(KBDocument.doc_id) == doc_id)
262
+ await session.execute(delete_stmt)
263
+ await session.commit()
264
+
265
+ # 在 vec db 中删除相关向量
266
+ await vec_db.delete_documents(metadata_filters={"kb_doc_id": doc_id})
267
+
268
+ # ===== 多媒体查询 =====
269
+
270
+ async def list_media_by_doc(self, doc_id: str) -> list[KBMedia]:
271
+ """列出文档的所有多媒体资源"""
272
+ async with self.get_db() as session:
273
+ stmt = select(KBMedia).where(col(KBMedia.doc_id) == doc_id)
274
+ result = await session.execute(stmt)
275
+ return list(result.scalars().all())
276
+
277
+ async def get_media_by_id(self, media_id: str) -> KBMedia | None:
278
+ """根据 ID 获取多媒体资源"""
279
+ async with self.get_db() as session:
280
+ stmt = select(KBMedia).where(col(KBMedia.media_id) == media_id)
281
+ result = await session.execute(stmt)
282
+ return result.scalar_one_or_none()
283
+
284
+ async def update_kb_stats(self, kb_id: str, vec_db: FaissVecDB) -> None:
285
+ """更新知识库统计信息"""
286
+ chunk_cnt = await vec_db.count_documents()
287
+
288
+ async with self.get_db() as session, session.begin():
289
+ update_stmt = (
290
+ update(KnowledgeBase)
291
+ .where(col(KnowledgeBase.kb_id) == kb_id)
292
+ .values(
293
+ doc_count=select(func.count(col(KBDocument.id)))
294
+ .where(col(KBDocument.kb_id) == kb_id)
295
+ .scalar_subquery(),
296
+ chunk_count=chunk_cnt,
297
+ )
298
+ )
299
+
300
+ await session.execute(update_stmt)
301
+ await session.commit()