jarvis-ai-assistant 0.7.8__py3-none-any.whl → 1.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (279) hide show
  1. jarvis/__init__.py +1 -1
  2. jarvis/jarvis_agent/__init__.py +567 -222
  3. jarvis/jarvis_agent/agent_manager.py +19 -12
  4. jarvis/jarvis_agent/builtin_input_handler.py +79 -11
  5. jarvis/jarvis_agent/config_editor.py +7 -2
  6. jarvis/jarvis_agent/event_bus.py +24 -13
  7. jarvis/jarvis_agent/events.py +19 -1
  8. jarvis/jarvis_agent/file_context_handler.py +67 -64
  9. jarvis/jarvis_agent/file_methodology_manager.py +38 -24
  10. jarvis/jarvis_agent/jarvis.py +186 -114
  11. jarvis/jarvis_agent/language_extractors/__init__.py +8 -1
  12. jarvis/jarvis_agent/language_extractors/c_extractor.py +7 -4
  13. jarvis/jarvis_agent/language_extractors/cpp_extractor.py +9 -4
  14. jarvis/jarvis_agent/language_extractors/go_extractor.py +7 -4
  15. jarvis/jarvis_agent/language_extractors/java_extractor.py +27 -20
  16. jarvis/jarvis_agent/language_extractors/javascript_extractor.py +22 -17
  17. jarvis/jarvis_agent/language_extractors/python_extractor.py +7 -4
  18. jarvis/jarvis_agent/language_extractors/rust_extractor.py +7 -4
  19. jarvis/jarvis_agent/language_extractors/typescript_extractor.py +22 -17
  20. jarvis/jarvis_agent/language_support_info.py +250 -219
  21. jarvis/jarvis_agent/main.py +19 -23
  22. jarvis/jarvis_agent/memory_manager.py +9 -6
  23. jarvis/jarvis_agent/methodology_share_manager.py +21 -15
  24. jarvis/jarvis_agent/output_handler.py +4 -2
  25. jarvis/jarvis_agent/prompt_builder.py +7 -6
  26. jarvis/jarvis_agent/prompt_manager.py +113 -8
  27. jarvis/jarvis_agent/prompts.py +317 -85
  28. jarvis/jarvis_agent/protocols.py +5 -2
  29. jarvis/jarvis_agent/run_loop.py +192 -32
  30. jarvis/jarvis_agent/session_manager.py +7 -3
  31. jarvis/jarvis_agent/share_manager.py +23 -13
  32. jarvis/jarvis_agent/shell_input_handler.py +12 -8
  33. jarvis/jarvis_agent/stdio_redirect.py +25 -26
  34. jarvis/jarvis_agent/task_analyzer.py +29 -23
  35. jarvis/jarvis_agent/task_list.py +869 -0
  36. jarvis/jarvis_agent/task_manager.py +26 -23
  37. jarvis/jarvis_agent/tool_executor.py +6 -5
  38. jarvis/jarvis_agent/tool_share_manager.py +24 -14
  39. jarvis/jarvis_agent/user_interaction.py +3 -3
  40. jarvis/jarvis_agent/utils.py +9 -1
  41. jarvis/jarvis_agent/web_bridge.py +37 -17
  42. jarvis/jarvis_agent/web_output_sink.py +5 -2
  43. jarvis/jarvis_agent/web_server.py +165 -36
  44. jarvis/jarvis_c2rust/__init__.py +1 -1
  45. jarvis/jarvis_c2rust/cli.py +260 -141
  46. jarvis/jarvis_c2rust/collector.py +37 -18
  47. jarvis/jarvis_c2rust/constants.py +60 -0
  48. jarvis/jarvis_c2rust/library_replacer.py +242 -1010
  49. jarvis/jarvis_c2rust/library_replacer_checkpoint.py +133 -0
  50. jarvis/jarvis_c2rust/library_replacer_llm.py +287 -0
  51. jarvis/jarvis_c2rust/library_replacer_loader.py +191 -0
  52. jarvis/jarvis_c2rust/library_replacer_output.py +134 -0
  53. jarvis/jarvis_c2rust/library_replacer_prompts.py +124 -0
  54. jarvis/jarvis_c2rust/library_replacer_utils.py +188 -0
  55. jarvis/jarvis_c2rust/llm_module_agent.py +98 -1044
  56. jarvis/jarvis_c2rust/llm_module_agent_apply.py +170 -0
  57. jarvis/jarvis_c2rust/llm_module_agent_executor.py +288 -0
  58. jarvis/jarvis_c2rust/llm_module_agent_loader.py +170 -0
  59. jarvis/jarvis_c2rust/llm_module_agent_prompts.py +268 -0
  60. jarvis/jarvis_c2rust/llm_module_agent_types.py +57 -0
  61. jarvis/jarvis_c2rust/llm_module_agent_utils.py +150 -0
  62. jarvis/jarvis_c2rust/llm_module_agent_validator.py +119 -0
  63. jarvis/jarvis_c2rust/loaders.py +28 -10
  64. jarvis/jarvis_c2rust/models.py +5 -2
  65. jarvis/jarvis_c2rust/optimizer.py +192 -1974
  66. jarvis/jarvis_c2rust/optimizer_build_fix.py +286 -0
  67. jarvis/jarvis_c2rust/optimizer_clippy.py +766 -0
  68. jarvis/jarvis_c2rust/optimizer_config.py +49 -0
  69. jarvis/jarvis_c2rust/optimizer_docs.py +183 -0
  70. jarvis/jarvis_c2rust/optimizer_options.py +48 -0
  71. jarvis/jarvis_c2rust/optimizer_progress.py +469 -0
  72. jarvis/jarvis_c2rust/optimizer_report.py +52 -0
  73. jarvis/jarvis_c2rust/optimizer_unsafe.py +309 -0
  74. jarvis/jarvis_c2rust/optimizer_utils.py +469 -0
  75. jarvis/jarvis_c2rust/optimizer_visibility.py +185 -0
  76. jarvis/jarvis_c2rust/scanner.py +229 -166
  77. jarvis/jarvis_c2rust/transpiler.py +531 -2732
  78. jarvis/jarvis_c2rust/transpiler_agents.py +503 -0
  79. jarvis/jarvis_c2rust/transpiler_build.py +1294 -0
  80. jarvis/jarvis_c2rust/transpiler_codegen.py +204 -0
  81. jarvis/jarvis_c2rust/transpiler_compile.py +146 -0
  82. jarvis/jarvis_c2rust/transpiler_config.py +178 -0
  83. jarvis/jarvis_c2rust/transpiler_context.py +122 -0
  84. jarvis/jarvis_c2rust/transpiler_executor.py +516 -0
  85. jarvis/jarvis_c2rust/transpiler_generation.py +278 -0
  86. jarvis/jarvis_c2rust/transpiler_git.py +163 -0
  87. jarvis/jarvis_c2rust/transpiler_mod_utils.py +225 -0
  88. jarvis/jarvis_c2rust/transpiler_modules.py +336 -0
  89. jarvis/jarvis_c2rust/transpiler_planning.py +394 -0
  90. jarvis/jarvis_c2rust/transpiler_review.py +1196 -0
  91. jarvis/jarvis_c2rust/transpiler_symbols.py +176 -0
  92. jarvis/jarvis_c2rust/utils.py +269 -79
  93. jarvis/jarvis_code_agent/after_change.py +233 -0
  94. jarvis/jarvis_code_agent/build_validation_config.py +37 -30
  95. jarvis/jarvis_code_agent/builtin_rules.py +68 -0
  96. jarvis/jarvis_code_agent/code_agent.py +976 -1517
  97. jarvis/jarvis_code_agent/code_agent_build.py +227 -0
  98. jarvis/jarvis_code_agent/code_agent_diff.py +246 -0
  99. jarvis/jarvis_code_agent/code_agent_git.py +525 -0
  100. jarvis/jarvis_code_agent/code_agent_impact.py +177 -0
  101. jarvis/jarvis_code_agent/code_agent_lint.py +283 -0
  102. jarvis/jarvis_code_agent/code_agent_llm.py +159 -0
  103. jarvis/jarvis_code_agent/code_agent_postprocess.py +105 -0
  104. jarvis/jarvis_code_agent/code_agent_prompts.py +46 -0
  105. jarvis/jarvis_code_agent/code_agent_rules.py +305 -0
  106. jarvis/jarvis_code_agent/code_analyzer/__init__.py +52 -48
  107. jarvis/jarvis_code_agent/code_analyzer/base_language.py +12 -10
  108. jarvis/jarvis_code_agent/code_analyzer/build_validator/__init__.py +12 -11
  109. jarvis/jarvis_code_agent/code_analyzer/build_validator/base.py +16 -12
  110. jarvis/jarvis_code_agent/code_analyzer/build_validator/cmake.py +26 -17
  111. jarvis/jarvis_code_agent/code_analyzer/build_validator/detector.py +558 -104
  112. jarvis/jarvis_code_agent/code_analyzer/build_validator/fallback.py +27 -16
  113. jarvis/jarvis_code_agent/code_analyzer/build_validator/go.py +22 -18
  114. jarvis/jarvis_code_agent/code_analyzer/build_validator/java_gradle.py +21 -16
  115. jarvis/jarvis_code_agent/code_analyzer/build_validator/java_maven.py +20 -16
  116. jarvis/jarvis_code_agent/code_analyzer/build_validator/makefile.py +27 -16
  117. jarvis/jarvis_code_agent/code_analyzer/build_validator/nodejs.py +47 -23
  118. jarvis/jarvis_code_agent/code_analyzer/build_validator/python.py +71 -37
  119. jarvis/jarvis_code_agent/code_analyzer/build_validator/rust.py +162 -35
  120. jarvis/jarvis_code_agent/code_analyzer/build_validator/validator.py +111 -57
  121. jarvis/jarvis_code_agent/code_analyzer/build_validator.py +18 -12
  122. jarvis/jarvis_code_agent/code_analyzer/context_manager.py +185 -183
  123. jarvis/jarvis_code_agent/code_analyzer/context_recommender.py +2 -1
  124. jarvis/jarvis_code_agent/code_analyzer/dependency_analyzer.py +24 -15
  125. jarvis/jarvis_code_agent/code_analyzer/file_ignore.py +227 -141
  126. jarvis/jarvis_code_agent/code_analyzer/impact_analyzer.py +321 -247
  127. jarvis/jarvis_code_agent/code_analyzer/language_registry.py +37 -29
  128. jarvis/jarvis_code_agent/code_analyzer/language_support.py +21 -13
  129. jarvis/jarvis_code_agent/code_analyzer/languages/__init__.py +15 -9
  130. jarvis/jarvis_code_agent/code_analyzer/languages/c_cpp_language.py +75 -45
  131. jarvis/jarvis_code_agent/code_analyzer/languages/go_language.py +87 -52
  132. jarvis/jarvis_code_agent/code_analyzer/languages/java_language.py +84 -51
  133. jarvis/jarvis_code_agent/code_analyzer/languages/javascript_language.py +94 -64
  134. jarvis/jarvis_code_agent/code_analyzer/languages/python_language.py +109 -71
  135. jarvis/jarvis_code_agent/code_analyzer/languages/rust_language.py +97 -63
  136. jarvis/jarvis_code_agent/code_analyzer/languages/typescript_language.py +103 -69
  137. jarvis/jarvis_code_agent/code_analyzer/llm_context_recommender.py +271 -268
  138. jarvis/jarvis_code_agent/code_analyzer/symbol_extractor.py +76 -64
  139. jarvis/jarvis_code_agent/code_analyzer/tree_sitter_extractor.py +92 -19
  140. jarvis/jarvis_code_agent/diff_visualizer.py +998 -0
  141. jarvis/jarvis_code_agent/lint.py +223 -524
  142. jarvis/jarvis_code_agent/rule_share_manager.py +158 -0
  143. jarvis/jarvis_code_agent/rules/clean_code.md +144 -0
  144. jarvis/jarvis_code_agent/rules/code_review.md +115 -0
  145. jarvis/jarvis_code_agent/rules/documentation.md +165 -0
  146. jarvis/jarvis_code_agent/rules/generate_rules.md +52 -0
  147. jarvis/jarvis_code_agent/rules/performance.md +158 -0
  148. jarvis/jarvis_code_agent/rules/refactoring.md +139 -0
  149. jarvis/jarvis_code_agent/rules/security.md +160 -0
  150. jarvis/jarvis_code_agent/rules/tdd.md +78 -0
  151. jarvis/jarvis_code_agent/test_rules/cpp_test.md +118 -0
  152. jarvis/jarvis_code_agent/test_rules/go_test.md +98 -0
  153. jarvis/jarvis_code_agent/test_rules/java_test.md +99 -0
  154. jarvis/jarvis_code_agent/test_rules/javascript_test.md +113 -0
  155. jarvis/jarvis_code_agent/test_rules/php_test.md +117 -0
  156. jarvis/jarvis_code_agent/test_rules/python_test.md +91 -0
  157. jarvis/jarvis_code_agent/test_rules/ruby_test.md +102 -0
  158. jarvis/jarvis_code_agent/test_rules/rust_test.md +86 -0
  159. jarvis/jarvis_code_agent/utils.py +36 -26
  160. jarvis/jarvis_code_analysis/checklists/loader.py +21 -21
  161. jarvis/jarvis_code_analysis/code_review.py +64 -33
  162. jarvis/jarvis_data/config_schema.json +285 -192
  163. jarvis/jarvis_git_squash/main.py +8 -6
  164. jarvis/jarvis_git_utils/git_commiter.py +53 -76
  165. jarvis/jarvis_mcp/__init__.py +5 -2
  166. jarvis/jarvis_mcp/sse_mcp_client.py +40 -30
  167. jarvis/jarvis_mcp/stdio_mcp_client.py +27 -19
  168. jarvis/jarvis_mcp/streamable_mcp_client.py +35 -26
  169. jarvis/jarvis_memory_organizer/memory_organizer.py +78 -55
  170. jarvis/jarvis_methodology/main.py +48 -39
  171. jarvis/jarvis_multi_agent/__init__.py +56 -23
  172. jarvis/jarvis_multi_agent/main.py +15 -18
  173. jarvis/jarvis_platform/base.py +179 -111
  174. jarvis/jarvis_platform/human.py +27 -16
  175. jarvis/jarvis_platform/kimi.py +52 -45
  176. jarvis/jarvis_platform/openai.py +101 -40
  177. jarvis/jarvis_platform/registry.py +51 -33
  178. jarvis/jarvis_platform/tongyi.py +68 -38
  179. jarvis/jarvis_platform/yuanbao.py +59 -43
  180. jarvis/jarvis_platform_manager/main.py +68 -76
  181. jarvis/jarvis_platform_manager/service.py +24 -14
  182. jarvis/jarvis_rag/README_CONFIG.md +314 -0
  183. jarvis/jarvis_rag/README_DYNAMIC_LOADING.md +311 -0
  184. jarvis/jarvis_rag/README_ONLINE_MODELS.md +230 -0
  185. jarvis/jarvis_rag/__init__.py +57 -4
  186. jarvis/jarvis_rag/cache.py +3 -1
  187. jarvis/jarvis_rag/cli.py +48 -68
  188. jarvis/jarvis_rag/embedding_interface.py +39 -0
  189. jarvis/jarvis_rag/embedding_manager.py +7 -230
  190. jarvis/jarvis_rag/embeddings/__init__.py +41 -0
  191. jarvis/jarvis_rag/embeddings/base.py +114 -0
  192. jarvis/jarvis_rag/embeddings/cohere.py +66 -0
  193. jarvis/jarvis_rag/embeddings/edgefn.py +117 -0
  194. jarvis/jarvis_rag/embeddings/local.py +260 -0
  195. jarvis/jarvis_rag/embeddings/openai.py +62 -0
  196. jarvis/jarvis_rag/embeddings/registry.py +293 -0
  197. jarvis/jarvis_rag/llm_interface.py +8 -6
  198. jarvis/jarvis_rag/query_rewriter.py +8 -9
  199. jarvis/jarvis_rag/rag_pipeline.py +61 -52
  200. jarvis/jarvis_rag/reranker.py +7 -75
  201. jarvis/jarvis_rag/reranker_interface.py +32 -0
  202. jarvis/jarvis_rag/rerankers/__init__.py +41 -0
  203. jarvis/jarvis_rag/rerankers/base.py +109 -0
  204. jarvis/jarvis_rag/rerankers/cohere.py +67 -0
  205. jarvis/jarvis_rag/rerankers/edgefn.py +140 -0
  206. jarvis/jarvis_rag/rerankers/jina.py +79 -0
  207. jarvis/jarvis_rag/rerankers/local.py +89 -0
  208. jarvis/jarvis_rag/rerankers/registry.py +293 -0
  209. jarvis/jarvis_rag/retriever.py +58 -43
  210. jarvis/jarvis_sec/__init__.py +66 -141
  211. jarvis/jarvis_sec/agents.py +21 -17
  212. jarvis/jarvis_sec/analysis.py +80 -33
  213. jarvis/jarvis_sec/checkers/__init__.py +7 -13
  214. jarvis/jarvis_sec/checkers/c_checker.py +356 -164
  215. jarvis/jarvis_sec/checkers/rust_checker.py +47 -29
  216. jarvis/jarvis_sec/cli.py +43 -21
  217. jarvis/jarvis_sec/clustering.py +430 -272
  218. jarvis/jarvis_sec/file_manager.py +99 -55
  219. jarvis/jarvis_sec/parsers.py +9 -6
  220. jarvis/jarvis_sec/prompts.py +4 -3
  221. jarvis/jarvis_sec/report.py +44 -22
  222. jarvis/jarvis_sec/review.py +180 -107
  223. jarvis/jarvis_sec/status.py +50 -41
  224. jarvis/jarvis_sec/types.py +3 -0
  225. jarvis/jarvis_sec/utils.py +160 -83
  226. jarvis/jarvis_sec/verification.py +411 -181
  227. jarvis/jarvis_sec/workflow.py +132 -21
  228. jarvis/jarvis_smart_shell/main.py +28 -41
  229. jarvis/jarvis_stats/cli.py +14 -12
  230. jarvis/jarvis_stats/stats.py +28 -19
  231. jarvis/jarvis_stats/storage.py +14 -8
  232. jarvis/jarvis_stats/visualizer.py +12 -7
  233. jarvis/jarvis_tools/base.py +5 -2
  234. jarvis/jarvis_tools/clear_memory.py +13 -9
  235. jarvis/jarvis_tools/cli/main.py +23 -18
  236. jarvis/jarvis_tools/edit_file.py +572 -873
  237. jarvis/jarvis_tools/execute_script.py +10 -7
  238. jarvis/jarvis_tools/file_analyzer.py +7 -8
  239. jarvis/jarvis_tools/meta_agent.py +287 -0
  240. jarvis/jarvis_tools/methodology.py +5 -3
  241. jarvis/jarvis_tools/read_code.py +305 -1438
  242. jarvis/jarvis_tools/read_symbols.py +50 -17
  243. jarvis/jarvis_tools/read_webpage.py +19 -18
  244. jarvis/jarvis_tools/registry.py +435 -156
  245. jarvis/jarvis_tools/retrieve_memory.py +16 -11
  246. jarvis/jarvis_tools/save_memory.py +8 -6
  247. jarvis/jarvis_tools/search_web.py +31 -31
  248. jarvis/jarvis_tools/sub_agent.py +32 -28
  249. jarvis/jarvis_tools/sub_code_agent.py +44 -60
  250. jarvis/jarvis_tools/task_list_manager.py +1811 -0
  251. jarvis/jarvis_tools/virtual_tty.py +29 -19
  252. jarvis/jarvis_utils/__init__.py +4 -0
  253. jarvis/jarvis_utils/builtin_replace_map.py +2 -1
  254. jarvis/jarvis_utils/clipboard.py +9 -8
  255. jarvis/jarvis_utils/collections.py +331 -0
  256. jarvis/jarvis_utils/config.py +699 -194
  257. jarvis/jarvis_utils/dialogue_recorder.py +294 -0
  258. jarvis/jarvis_utils/embedding.py +6 -3
  259. jarvis/jarvis_utils/file_processors.py +7 -1
  260. jarvis/jarvis_utils/fzf.py +9 -3
  261. jarvis/jarvis_utils/git_utils.py +71 -42
  262. jarvis/jarvis_utils/globals.py +116 -32
  263. jarvis/jarvis_utils/http.py +6 -2
  264. jarvis/jarvis_utils/input.py +318 -83
  265. jarvis/jarvis_utils/jsonnet_compat.py +119 -104
  266. jarvis/jarvis_utils/methodology.py +37 -28
  267. jarvis/jarvis_utils/output.py +201 -44
  268. jarvis/jarvis_utils/utils.py +986 -628
  269. {jarvis_ai_assistant-0.7.8.dist-info → jarvis_ai_assistant-1.0.2.dist-info}/METADATA +49 -33
  270. jarvis_ai_assistant-1.0.2.dist-info/RECORD +304 -0
  271. jarvis/jarvis_code_agent/code_analyzer/structured_code.py +0 -556
  272. jarvis/jarvis_tools/generate_new_tool.py +0 -205
  273. jarvis/jarvis_tools/lsp_client.py +0 -1552
  274. jarvis/jarvis_tools/rewrite_file.py +0 -105
  275. jarvis_ai_assistant-0.7.8.dist-info/RECORD +0 -218
  276. {jarvis_ai_assistant-0.7.8.dist-info → jarvis_ai_assistant-1.0.2.dist-info}/WHEEL +0 -0
  277. {jarvis_ai_assistant-0.7.8.dist-info → jarvis_ai_assistant-1.0.2.dist-info}/entry_points.txt +0 -0
  278. {jarvis_ai_assistant-0.7.8.dist-info → jarvis_ai_assistant-1.0.2.dist-info}/licenses/LICENSE +0 -0
  279. {jarvis_ai_assistant-0.7.8.dist-info → jarvis_ai_assistant-1.0.2.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,230 @@
1
+ # 在线模型使用指南
2
+
3
+ 本文档介绍如何使用在线嵌入模型和重排模型。
4
+
5
+ ## 概述
6
+
7
+ Jarvis RAG 框架现在支持通过抽象接口使用在线模型(API)和本地模型。这允许您:
8
+
9
+ - 使用本地 HuggingFace 模型(默认)
10
+ - 使用在线 API(如 OpenAI、Cohere、Jina 等)
11
+ - 轻松切换或混合使用不同的模型
12
+
13
+ ## 接口抽象
14
+
15
+ ### EmbeddingInterface
16
+
17
+ 所有嵌入模型都实现 `EmbeddingInterface` 接口,提供以下方法:
18
+
19
+ - `embed_documents(texts: List[str]) -> List[List[float]]`: 为文档列表计算嵌入
20
+ - `embed_query(text: str) -> List[float]`: 为查询计算嵌入
21
+
22
+ ### RerankerInterface
23
+
24
+ 所有重排模型都实现 `RerankerInterface` 接口,提供以下方法:
25
+
26
+ - `rerank(query: str, documents: List[Document], top_n: int = 5) -> List[Document]`: 对文档进行重排
27
+
28
+ ## 使用示例
29
+
30
+ ### 使用本地模型(默认)
31
+
32
+ ```python
33
+ from jarvis.jarvis_rag import JarvisRAGPipeline
34
+
35
+ # 使用默认的本地模型
36
+ pipeline = JarvisRAGPipeline()
37
+ ```
38
+
39
+ ### 使用在线嵌入模型
40
+
41
+ #### OpenAI 嵌入模型
42
+
43
+ ```python
44
+ from jarvis.jarvis_rag import JarvisRAGPipeline
45
+ from jarvis.jarvis_rag.online_embedding import OpenAIEmbeddingModel
46
+
47
+ # 创建在线嵌入模型
48
+ embedding_model = OpenAIEmbeddingModel(
49
+ api_key="your-api-key", # 或设置 OPENAI_API_KEY 环境变量
50
+ model_name="text-embedding-3-small"
51
+ )
52
+
53
+ # 注意:目前 JarvisRAGPipeline 的构造函数还不直接支持传入嵌入模型实例
54
+ # 您需要修改代码或等待后续更新
55
+ ```
56
+
57
+ #### Cohere 嵌入模型
58
+
59
+ ```python
60
+ from jarvis.jarvis_rag.online_embedding import CohereEmbeddingModel
61
+
62
+ embedding_model = CohereEmbeddingModel(
63
+ api_key="your-api-key", # 或设置 COHERE_API_KEY 环境变量
64
+ model_name="embed-english-v3.0"
65
+ )
66
+ ```
67
+
68
+ #### EdgeFn 嵌入模型
69
+
70
+ ```python
71
+ from jarvis.jarvis_rag.online_embedding import EdgeFnEmbeddingModel
72
+
73
+ embedding_model = EdgeFnEmbeddingModel(
74
+ api_key="your-api-key", # 或设置 EDGEFN_API_KEY 环境变量
75
+ model_name="BAAI/bge-m3" # 或其他支持的模型
76
+ )
77
+ ```
78
+
79
+ ### 完整示例:使用 EdgeFn 嵌入和重排模型
80
+
81
+ ```python
82
+ from jarvis.jarvis_rag.online_embedding import EdgeFnEmbeddingModel
83
+ from jarvis.jarvis_rag.online_reranker import EdgeFnReranker
84
+ from langchain.docstore.document import Document
85
+
86
+ # 初始化 EdgeFn 嵌入模型
87
+ embedding_model = EdgeFnEmbeddingModel(
88
+ api_key="your-api-key", # 或设置 EDGEFN_API_KEY 环境变量
89
+ model_name="BAAI/bge-m3"
90
+ )
91
+
92
+ # 初始化 EdgeFn 重排模型
93
+ reranker = EdgeFnReranker(
94
+ api_key="your-api-key", # 或设置 EDGEFN_API_KEY 环境变量
95
+ model_name="bge-reranker-v2-m3"
96
+ )
97
+
98
+ # 使用嵌入模型
99
+ documents = ["文档1内容", "文档2内容", "文档3内容"]
100
+ embeddings = embedding_model.embed_documents(documents)
101
+ query_embedding = embedding_model.embed_query("查询文本")
102
+
103
+ # 使用重排模型
104
+ doc_list = [
105
+ Document(page_content="apple", metadata={"source": "doc1"}),
106
+ Document(page_content="banana", metadata={"source": "doc2"}),
107
+ Document(page_content="fruit", metadata={"source": "doc3"}),
108
+ Document(page_content="vegetable", metadata={"source": "doc4"}),
109
+ ]
110
+ reranked_docs = reranker.rerank(query="Apple", documents=doc_list, top_n=3)
111
+ ```
112
+
113
+ ### 使用在线重排模型
114
+
115
+ #### Cohere 重排模型
116
+
117
+ ```python
118
+ from jarvis.jarvis_rag.online_reranker import CohereReranker
119
+
120
+ reranker = CohereReranker(
121
+ api_key="your-api-key", # 或设置 COHERE_API_KEY 环境变量
122
+ model_name="rerank-english-v3.0"
123
+ )
124
+ ```
125
+
126
+ #### Jina 重排模型
127
+
128
+ ```python
129
+ from jarvis.jarvis_rag.online_reranker import JinaReranker
130
+
131
+ reranker = JinaReranker(
132
+ api_key="your-api-key", # 或设置 JINA_API_KEY 环境变量
133
+ model_name="jina-reranker-v1-base-en"
134
+ )
135
+ ```
136
+
137
+ #### EdgeFn 重排模型
138
+
139
+ ```python
140
+ from jarvis.jarvis_rag.online_reranker import EdgeFnReranker
141
+
142
+ reranker = EdgeFnReranker(
143
+ api_key="your-api-key", # 或设置 EDGEFN_API_KEY 环境变量
144
+ model_name="bge-reranker-v2-m3" # 或其他支持的模型
145
+ )
146
+ ```
147
+
148
+ ## 实现自定义在线模型
149
+
150
+ ### 实现自定义嵌入模型
151
+
152
+ ```python
153
+ from jarvis.jarvis_rag.embedding_interface import EmbeddingInterface
154
+ from typing import List
155
+
156
+ class MyCustomEmbeddingModel(EmbeddingInterface):
157
+ def __init__(self, api_key: str):
158
+ self.api_key = api_key
159
+ # 初始化您的API客户端
160
+
161
+ def embed_documents(self, texts: List[str]) -> List[List[float]]:
162
+ # 实现您的API调用逻辑
163
+ embeddings = []
164
+ for text in texts:
165
+ # 调用您的API
166
+ embedding = self._call_your_api(text)
167
+ embeddings.append(embedding)
168
+ return embeddings
169
+
170
+ def embed_query(self, text: str) -> List[float]:
171
+ # 实现查询嵌入逻辑
172
+ return self._call_your_api(text)
173
+ ```
174
+
175
+ ### 实现自定义重排模型
176
+
177
+ ```python
178
+ from jarvis.jarvis_rag.reranker_interface import RerankerInterface
179
+ from langchain.docstore.document import Document
180
+ from typing import List
181
+
182
+ class MyCustomReranker(RerankerInterface):
183
+ def __init__(self, api_key: str):
184
+ self.api_key = api_key
185
+ # 初始化您的API客户端
186
+
187
+ def rerank(
188
+ self, query: str, documents: List[Document], top_n: int = 5
189
+ ) -> List[Document]:
190
+ # 实现您的重排逻辑
191
+ doc_texts = [doc.page_content for doc in documents]
192
+ scores = self._call_your_api(query, doc_texts)
193
+
194
+ # 根据分数排序
195
+ doc_with_scores = list(zip(documents, scores))
196
+ doc_with_scores.sort(key=lambda x: x[1], reverse=True)
197
+
198
+ return [doc for doc, _ in doc_with_scores[:top_n]]
199
+ ```
200
+
201
+ ## 向后兼容性
202
+
203
+ 为了保持向后兼容性,原有的类名仍然可用:
204
+
205
+ - `EmbeddingManager` 是 `LocalEmbeddingModel` 的别名
206
+ - `Reranker` 是 `LocalReranker` 的别名
207
+
208
+ 现有代码无需修改即可继续工作。
209
+
210
+ ## 注意事项
211
+
212
+ 1. **API 密钥安全**: 建议使用环境变量存储 API 密钥,而不是硬编码在代码中。
213
+
214
+ 2. **成本考虑**: 在线模型通常按使用量收费,请根据您的需求选择合适的模型。
215
+
216
+ 3. **性能**: 在线模型需要网络请求,可能比本地模型慢,但通常提供更好的效果。
217
+
218
+ 4. **依赖安装**: 使用特定的在线模型需要安装相应的 Python 包:
219
+ - OpenAI: `pip install openai`
220
+ - Cohere: `pip install cohere`
221
+ - EdgeFn (嵌入和重排): `pip install requests` (已包含在大多数环境中)
222
+ - Jina: `pip install requests` (已包含在大多数环境中)
223
+
224
+ ## 未来扩展
225
+
226
+ 框架设计允许轻松添加新的在线模型提供商。如果您需要支持其他 API,可以:
227
+
228
+ 1. 继承 `OnlineEmbeddingModel` 或 `OnlineReranker` 基类
229
+ 2. 实现 `_call_api` 方法
230
+ 3. 在 `__init__.py` 中导出新类
@@ -1,11 +1,64 @@
1
1
  """
2
2
  Jarvis RAG 框架
3
3
 
4
- 一个灵活的RAG管道,具有可插拔的远程LLM和本地带缓存的嵌入模型。
4
+ 一个灵活的RAG管道,具有可插拔的远程LLM和本地/在线嵌入模型和重排模型。
5
5
  """
6
6
 
7
- from .rag_pipeline import JarvisRAGPipeline
7
+ from .embedding_interface import EmbeddingInterface
8
+
9
+ # 从新的目录结构导入
10
+ from .embeddings import EmbeddingManager # 向后兼容别名
11
+ from .embeddings import EmbeddingRegistry
12
+ from .embeddings import LocalEmbeddingModel
8
13
  from .llm_interface import LLMInterface
9
- from .embedding_manager import EmbeddingManager
14
+ from .rag_pipeline import JarvisRAGPipeline
15
+ from .reranker_interface import RerankerInterface
16
+ from .rerankers import LocalReranker
17
+ from .rerankers import Reranker # 向后兼容别名
18
+ from .rerankers import RerankerRegistry
19
+
20
+ # 在线模型实现(可选导入)
21
+ try:
22
+ from .embeddings import CohereEmbeddingModel
23
+ from .embeddings import EdgeFnEmbeddingModel
24
+ from .embeddings import OnlineEmbeddingModel
25
+ from .embeddings import OpenAIEmbeddingModel
26
+ from .rerankers import CohereReranker
27
+ from .rerankers import EdgeFnReranker
28
+ from .rerankers import JinaReranker
29
+ from .rerankers import OnlineReranker
10
30
 
11
- __all__ = ["JarvisRAGPipeline", "LLMInterface", "EmbeddingManager"]
31
+ __all__ = [
32
+ "JarvisRAGPipeline",
33
+ "LLMInterface",
34
+ "EmbeddingInterface",
35
+ "EmbeddingManager", # 向后兼容别名
36
+ "LocalEmbeddingModel",
37
+ "EmbeddingRegistry",
38
+ "RerankerInterface",
39
+ "Reranker", # 向后兼容别名
40
+ "LocalReranker",
41
+ "RerankerRegistry",
42
+ "OnlineEmbeddingModel",
43
+ "OpenAIEmbeddingModel",
44
+ "CohereEmbeddingModel",
45
+ "EdgeFnEmbeddingModel",
46
+ "OnlineReranker",
47
+ "CohereReranker",
48
+ "JinaReranker",
49
+ "EdgeFnReranker",
50
+ ]
51
+ except ImportError:
52
+ # 如果在线模型依赖未安装,只导出基础接口
53
+ __all__ = [
54
+ "JarvisRAGPipeline",
55
+ "LLMInterface",
56
+ "EmbeddingInterface",
57
+ "EmbeddingManager", # 向后兼容别名
58
+ "LocalEmbeddingModel",
59
+ "EmbeddingRegistry",
60
+ "RerankerInterface",
61
+ "Reranker", # 向后兼容别名
62
+ "LocalReranker",
63
+ "RerankerRegistry",
64
+ ]
@@ -1,5 +1,7 @@
1
1
  import hashlib
2
- from typing import List, Optional, Any
2
+ from typing import Any
3
+ from typing import List
4
+ from typing import Optional
3
5
 
4
6
  from diskcache import Cache
5
7
 
jarvis/jarvis_rag/cli.py CHANGED
@@ -1,25 +1,26 @@
1
+ import mimetypes
1
2
  import os
2
3
  import sys
3
4
  from pathlib import Path
4
- from typing import Optional, List, Tuple
5
- import mimetypes
5
+ from typing import List
6
+ from typing import Optional
7
+ from typing import Tuple
6
8
 
7
- import pathspec # type: ignore
9
+ import pathspec
8
10
  import typer
9
11
  from langchain.docstore.document import Document
10
- from langchain_community.document_loaders import (
11
- TextLoader,
12
- UnstructuredMarkdownLoader,
13
- )
12
+ from langchain_community.document_loaders import TextLoader
13
+ from langchain_community.document_loaders import UnstructuredMarkdownLoader
14
14
  from langchain_core.document_loaders.base import BaseLoader
15
15
  from rich.markdown import Markdown
16
16
 
17
- from jarvis.jarvis_utils.utils import init_env, is_rag_installed, get_missing_rag_modules
18
- from jarvis.jarvis_utils.config import (
19
- get_rag_embedding_model,
20
- get_rag_use_bm25,
21
- get_rag_use_rerank,
22
- )
17
+ from jarvis.jarvis_utils.config import get_rag_embedding_model
18
+ from jarvis.jarvis_utils.config import get_rag_use_bm25
19
+ from jarvis.jarvis_utils.config import get_rag_use_rerank
20
+ from jarvis.jarvis_utils.output import PrettyOutput
21
+ from jarvis.jarvis_utils.utils import get_missing_rag_modules
22
+ from jarvis.jarvis_utils.utils import init_env
23
+ from jarvis.jarvis_utils.utils import is_rag_installed
23
24
 
24
25
 
25
26
  def is_likely_text_file(file_path: Path) -> bool:
@@ -70,7 +71,7 @@ class _CustomPlatformLLM(LLMInterface):
70
71
 
71
72
  def __init__(self, platform: BasePlatform):
72
73
  self.platform = platform
73
- print(
74
+ PrettyOutput.auto_print(
74
75
  f"ℹ️ 使用自定义LLM: 平台='{platform.platform_name()}', 模型='{platform.name()}'"
75
76
  )
76
77
 
@@ -86,15 +87,13 @@ def _create_custom_llm(platform_name: str, model_name: str) -> Optional[LLMInter
86
87
  registry = PlatformRegistry.get_global_platform_registry()
87
88
  platform_instance = registry.create_platform(platform_name)
88
89
  if not platform_instance:
89
- print(
90
- f"❌ 错误: 平台 '{platform_name}' 未找到。"
91
- )
90
+ PrettyOutput.auto_print(f"❌ 错误: 平台 '{platform_name}' 未找到。")
92
91
  return None
93
92
  platform_instance.set_model_name(model_name)
94
93
  platform_instance.set_suppress_output(True)
95
94
  return _CustomPlatformLLM(platform_instance)
96
95
  except Exception as e:
97
- print(f"❌ 创建自定义LLM时出错: {e}")
96
+ PrettyOutput.auto_print(f"❌ 创建自定义LLM时出错: {e}")
98
97
  return None
99
98
 
100
99
 
@@ -118,14 +117,10 @@ def _load_ragignore_spec() -> Tuple[Optional[pathspec.PathSpec], Optional[Path]]
118
117
  with open(ignore_file_to_use, "r", encoding="utf-8") as f:
119
118
  patterns = f.read().splitlines()
120
119
  spec = pathspec.PathSpec.from_lines("gitwildmatch", patterns)
121
- print(
122
- f"✅ 加载忽略规则: {ignore_file_to_use}"
123
- )
120
+ PrettyOutput.auto_print(f"✅ 加载忽略规则: {ignore_file_to_use}")
124
121
  return spec, project_root_path
125
122
  except Exception as e:
126
- print(
127
- f"⚠️ 加载 {ignore_file_to_use.name} 文件失败: {e}"
128
- )
123
+ PrettyOutput.auto_print(f"⚠️ 加载 {ignore_file_to_use.name} 文件失败: {e}")
129
124
 
130
125
  return None, None
131
126
 
@@ -176,7 +171,7 @@ def add_documents(
176
171
  continue
177
172
 
178
173
  if path.is_dir():
179
- print(f"ℹ️ 正在扫描目录: {path}")
174
+ PrettyOutput.auto_print(f"ℹ️ 正在扫描目录: {path}")
180
175
  for item in path.rglob("*"):
181
176
  if item.is_file() and is_likely_text_file(item):
182
177
  files_to_process.add(item)
@@ -184,12 +179,10 @@ def add_documents(
184
179
  if is_likely_text_file(path):
185
180
  files_to_process.add(path)
186
181
  else:
187
- print(
188
- f"⚠️ 跳过可能的二进制文件: {path}"
189
- )
182
+ PrettyOutput.auto_print(f"⚠️ 跳过可能的二进制文件: {path}")
190
183
 
191
184
  if not files_to_process:
192
- print("⚠️ 在指定路径中未找到任何文本文件。")
185
+ PrettyOutput.auto_print("⚠️ 在指定路径中未找到任何文本文件。")
193
186
  return
194
187
 
195
188
  # 使用 .ragignore 过滤文件
@@ -210,20 +203,16 @@ def add_documents(
210
203
 
211
204
  ignored_count = initial_count - len(retained_files)
212
205
  if ignored_count > 0:
213
- print(
206
+ PrettyOutput.auto_print(
214
207
  f"ℹ️ 根据 .ragignore 规则过滤掉 {ignored_count} 个文件。"
215
208
  )
216
209
  files_to_process = retained_files
217
210
 
218
211
  if not files_to_process:
219
- print(
220
- "⚠️ 所有找到的文本文件都被忽略规则过滤掉了。"
221
- )
212
+ PrettyOutput.auto_print("⚠️ 所有找到的文本文件都被忽略规则过滤掉了。")
222
213
  return
223
214
 
224
- print(
225
- f"ℹ️ 发现 {len(files_to_process)} 个独立文件待处理。"
226
- )
215
+ PrettyOutput.auto_print(f"ℹ️ 发现 {len(files_to_process)} 个独立文件待处理。")
227
216
 
228
217
  try:
229
218
  pipeline = JarvisRAGPipeline(
@@ -250,38 +239,36 @@ def add_documents(
250
239
  docs_batch.extend(loader.load())
251
240
  loaded_msgs.append(f"已加载: {file_path} (文件 {i + 1}/{total_files})")
252
241
  except Exception as e:
253
- print(f"⚠️ 加载失败 {file_path}: {e}")
242
+ PrettyOutput.auto_print(f"⚠️ 加载失败 {file_path}: {e}")
254
243
 
255
244
  # 当批处理已满或是最后一个文件时处理批处理
256
245
  if docs_batch and (len(docs_batch) >= batch_size or (i + 1) == total_files):
257
246
  if loaded_msgs:
258
- joined_msgs = '\n'.join(loaded_msgs)
259
- print(f"ℹ️ {joined_msgs}")
247
+ joined_msgs = "\n".join(loaded_msgs)
248
+ PrettyOutput.auto_print(f"ℹ️ {joined_msgs}")
260
249
  loaded_msgs = []
261
- print(
250
+ PrettyOutput.auto_print(
262
251
  f"ℹ️ 正在处理批次,包含 {len(docs_batch)} 个文档..."
263
252
  )
264
253
  pipeline.add_documents(docs_batch)
265
254
  total_docs_added += len(docs_batch)
266
- print(
267
- f"✅ 成功添加 {len(docs_batch)} 个文档。"
268
- )
255
+ PrettyOutput.auto_print(f"✅ 成功添加 {len(docs_batch)} 个文档。")
269
256
  docs_batch = [] # 清空批处理
270
257
 
271
258
  # 最后统一打印可能残留的"已加载"信息
272
259
  if loaded_msgs:
273
- print(f"ℹ️ {chr(10).join(loaded_msgs)}")
260
+ PrettyOutput.auto_print(f"ℹ️ {chr(10).join(loaded_msgs)}")
274
261
  loaded_msgs = []
275
262
  if total_docs_added == 0:
276
- print("❌ 未能成功加载任何文档。")
263
+ PrettyOutput.auto_print("❌ 未能成功加载任何文档。")
277
264
  raise typer.Exit(code=1)
278
265
 
279
- print(
266
+ PrettyOutput.auto_print(
280
267
  f"✅ 成功将 {total_docs_added} 个文档的内容添加至集合 '{collection_name}'。"
281
268
  )
282
269
 
283
270
  except Exception as e:
284
- print(f"❌ 发生严重错误: {e}")
271
+ PrettyOutput.auto_print(f"❌ 发生严重错误: {e}")
285
272
  raise typer.Exit(code=1)
286
273
 
287
274
 
@@ -308,7 +295,7 @@ def list_documents(
308
295
  results = collection.get() # 获取集合中的所有项目
309
296
 
310
297
  if not results or not results["metadatas"]:
311
- print("ℹ️ 知识库中没有找到任何文档。")
298
+ PrettyOutput.auto_print("ℹ️ 知识库中没有找到任何文档。")
312
299
  return
313
300
 
314
301
  # 从元数据中提取唯一的源文件路径
@@ -320,20 +307,18 @@ def list_documents(
320
307
  sources.add(source)
321
308
 
322
309
  if not sources:
323
- print(
324
- "ℹ️ 知识库中没有找到任何带有源信息的文档。"
325
- )
310
+ PrettyOutput.auto_print("ℹ️ 知识库中没有找到任何带有源信息的文档。")
326
311
  return
327
312
 
328
313
  # 避免在循环中逐条打印,先拼接后统一打印
329
314
  lines = [f"知识库 '{collection_name}' 中共有 {len(sources)} 个独立文档:"]
330
315
  for i, source in enumerate(sorted(list(sources)), 1):
331
316
  lines.append(f" {i}. {source}")
332
- joined_lines = '\n'.join(lines)
333
- print(f"ℹ️ {joined_lines}")
317
+ joined_lines = "\n".join(lines)
318
+ PrettyOutput.auto_print(f"ℹ️ {joined_lines}")
334
319
 
335
320
  except Exception as e:
336
- print(f"❌ 发生错误: {e}")
321
+ PrettyOutput.auto_print(f"❌ 发生错误: {e}")
337
322
  raise typer.Exit(code=1)
338
323
 
339
324
 
@@ -379,16 +364,14 @@ def retrieve(
379
364
  use_query_rewrite=rewrite,
380
365
  )
381
366
 
382
- print(f"ℹ️ 正在为问题检索文档: '{question}'")
367
+ PrettyOutput.auto_print(f"ℹ️ 正在为问题检索文档: '{question}'")
383
368
  retrieved_docs = pipeline.retrieve_only(question, n_results=n_results)
384
369
 
385
370
  if not retrieved_docs:
386
- print("ℹ️ 未找到相关文档。")
371
+ PrettyOutput.auto_print("ℹ️ 未找到相关文档。")
387
372
  return
388
373
 
389
- print(
390
- f"✅ 成功检索到 {len(retrieved_docs)} 个文档:"
391
- )
374
+ PrettyOutput.auto_print(f"✅ 成功检索到 {len(retrieved_docs)} 个文档:")
392
375
  from jarvis.jarvis_utils.globals import console
393
376
 
394
377
  for i, doc in enumerate(retrieved_docs, 1):
@@ -399,7 +382,7 @@ def retrieve(
399
382
  console.print(Markdown(f"```\n{content}\n```"))
400
383
 
401
384
  except Exception as e:
402
- print(f"❌ 发生错误: {e}")
385
+ PrettyOutput.auto_print(f"❌ 发生错误: {e}")
403
386
  raise typer.Exit(code=1)
404
387
 
405
388
 
@@ -436,7 +419,7 @@ def query(
436
419
  ):
437
420
  """查询RAG知识库并打印答案。"""
438
421
  if model and not platform:
439
- print("❌ 错误: --model 需要指定 --platform。")
422
+ PrettyOutput.auto_print("❌ 错误: --model 需要指定 --platform。")
440
423
  raise typer.Exit(code=1)
441
424
 
442
425
  try:
@@ -458,24 +441,21 @@ def query(
458
441
  use_rerank=use_rerank,
459
442
  )
460
443
 
461
- print(f"ℹ️ 正在查询: '{question}'")
444
+ PrettyOutput.auto_print(f"ℹ️ 正在查询: '{question}'")
462
445
  answer = pipeline.query(question)
463
446
 
464
- print(f"✅ {answer}")
447
+ PrettyOutput.auto_print(f"✅ {answer}")
465
448
 
466
449
  except Exception as e:
467
- print(f"❌ 发生错误: {e}")
450
+ PrettyOutput.auto_print(f"❌ 发生错误: {e}")
468
451
  raise typer.Exit(code=1)
469
452
 
470
453
 
471
-
472
-
473
-
474
454
  def _check_rag_dependencies():
475
455
  if not is_rag_installed():
476
456
  missing = get_missing_rag_modules()
477
457
  missing_str = f"缺少依赖: {', '.join(missing)}。" if missing else ""
478
- print(
458
+ PrettyOutput.auto_print(
479
459
  f"❌ RAG依赖项未安装或不完整。{missing_str}请运行 'pip install \"jarvis-ai-assistant[rag]\"' 后重试。"
480
460
  )
481
461
  raise typer.Exit(code=1)
@@ -0,0 +1,39 @@
1
+ from abc import ABC
2
+ from abc import abstractmethod
3
+ from typing import List
4
+
5
+
6
+ class EmbeddingInterface(ABC):
7
+ """
8
+ 嵌入模型接口的抽象基类。
9
+
10
+ 该类定义了嵌入模型的标准接口,支持本地模型和在线模型(API)的实现。
11
+ 任何嵌入模型提供商(如HuggingFace本地模型、OpenAI API、Cohere API等)
12
+ 都应作为该接口的子类来实现。
13
+ """
14
+
15
+ @abstractmethod
16
+ def embed_documents(self, texts: List[str]) -> List[List[float]]:
17
+ """
18
+ 为文档列表计算嵌入。
19
+
20
+ 参数:
21
+ texts: 要嵌入的文档(字符串)列表。
22
+
23
+ 返回:
24
+ 一个嵌入列表,每个文档对应一个嵌入向量。
25
+ """
26
+ pass
27
+
28
+ @abstractmethod
29
+ def embed_query(self, text: str) -> List[float]:
30
+ """
31
+ 为单个查询计算嵌入。
32
+
33
+ 参数:
34
+ text: 要嵌入的查询文本。
35
+
36
+ 返回:
37
+ 查询的嵌入向量。
38
+ """
39
+ pass