jarvis-ai-assistant 0.1.173__py3-none-any.whl → 0.1.175__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of jarvis-ai-assistant might be problematic. Click here for more details.
- jarvis/__init__.py +1 -1
- jarvis/jarvis_agent/__init__.py +13 -88
- jarvis/jarvis_code_agent/code_agent.py +2 -1
- jarvis/jarvis_dev/main.py +6 -6
- jarvis/jarvis_methodology/main.py +4 -4
- jarvis/jarvis_multi_agent/__init__.py +16 -22
- jarvis/jarvis_platform/base.py +4 -3
- jarvis/jarvis_platform/kimi.py +124 -54
- jarvis/jarvis_platform/openai.py +29 -8
- jarvis/jarvis_platform/yuanbao.py +79 -37
- jarvis/jarvis_platform_manager/main.py +52 -1
- jarvis/jarvis_tools/cli/main.py +31 -12
- jarvis/jarvis_tools/registry.py +8 -8
- jarvis/jarvis_utils/config.py +1 -1
- jarvis/jarvis_utils/embedding.py +49 -108
- jarvis/jarvis_utils/methodology.py +37 -0
- jarvis/jarvis_utils/output.py +1 -20
- {jarvis_ai_assistant-0.1.173.dist-info → jarvis_ai_assistant-0.1.175.dist-info}/METADATA +1 -1
- {jarvis_ai_assistant-0.1.173.dist-info → jarvis_ai_assistant-0.1.175.dist-info}/RECORD +23 -23
- {jarvis_ai_assistant-0.1.173.dist-info → jarvis_ai_assistant-0.1.175.dist-info}/WHEEL +0 -0
- {jarvis_ai_assistant-0.1.173.dist-info → jarvis_ai_assistant-0.1.175.dist-info}/entry_points.txt +0 -0
- {jarvis_ai_assistant-0.1.173.dist-info → jarvis_ai_assistant-0.1.175.dist-info}/licenses/LICENSE +0 -0
- {jarvis_ai_assistant-0.1.173.dist-info → jarvis_ai_assistant-0.1.175.dist-info}/top_level.txt +0 -0
|
@@ -11,6 +11,10 @@ from PIL import Image
|
|
|
11
11
|
from yaspin import yaspin
|
|
12
12
|
from yaspin.spinners import Spinners
|
|
13
13
|
from yaspin.api import Yaspin
|
|
14
|
+
from rich.live import Live
|
|
15
|
+
from rich.text import Text
|
|
16
|
+
from rich.panel import Panel
|
|
17
|
+
from rich import box
|
|
14
18
|
from jarvis.jarvis_platform.base import BasePlatform
|
|
15
19
|
from jarvis.jarvis_utils.output import OutputType, PrettyOutput
|
|
16
20
|
from jarvis.jarvis_utils.utils import while_success
|
|
@@ -387,7 +391,6 @@ class YuanbaoPlatform(BasePlatform):
|
|
|
387
391
|
|
|
388
392
|
参数:
|
|
389
393
|
message: 要发送的消息文本
|
|
390
|
-
file_list: 可选的上传和附加文件路径列表
|
|
391
394
|
|
|
392
395
|
返回:
|
|
393
396
|
模型的响应
|
|
@@ -425,7 +428,9 @@ class YuanbaoPlatform(BasePlatform):
|
|
|
425
428
|
self.multimedia = []
|
|
426
429
|
|
|
427
430
|
if self.web:
|
|
428
|
-
payload["supportFunctions"] = ["
|
|
431
|
+
payload["supportFunctions"] = ["openInternetSearch"]
|
|
432
|
+
else:
|
|
433
|
+
payload["supportFunctions"] = ["autoInternetSearch"]
|
|
429
434
|
|
|
430
435
|
# 添加系统消息(如果是第一次对话)
|
|
431
436
|
if self.first_chat and self.system_message:
|
|
@@ -449,45 +454,82 @@ class YuanbaoPlatform(BasePlatform):
|
|
|
449
454
|
|
|
450
455
|
full_response = ""
|
|
451
456
|
is_text_block = False
|
|
457
|
+
thinking_content = ""
|
|
452
458
|
|
|
453
|
-
#
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
#
|
|
467
|
-
if
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
459
|
+
# 使用Rich的Live组件来实时展示更新
|
|
460
|
+
if not self.suppress_output:
|
|
461
|
+
text_content = Text()
|
|
462
|
+
panel = Panel(text_content, title=f"[bold blue]{self.model_name}[/bold blue]",
|
|
463
|
+
subtitle="思考中...", border_style="blue", box=box.ROUNDED)
|
|
464
|
+
with Live(panel, refresh_per_second=3, transient=False) as live:
|
|
465
|
+
# 处理SSE流响应
|
|
466
|
+
for line in response.iter_lines():
|
|
467
|
+
if not line:
|
|
468
|
+
continue
|
|
469
|
+
|
|
470
|
+
line_str = line.decode('utf-8')
|
|
471
|
+
|
|
472
|
+
# SSE格式的行通常以"data: "开头
|
|
473
|
+
if line_str.startswith("data: "):
|
|
474
|
+
try:
|
|
475
|
+
data_str = line_str[6:] # 移除"data: "前缀
|
|
476
|
+
data = json.loads(data_str)
|
|
477
|
+
|
|
478
|
+
# 处理文本类型的消息
|
|
479
|
+
if data.get("type") == "text":
|
|
480
|
+
is_text_block = True
|
|
481
|
+
msg = data.get("msg", "")
|
|
482
|
+
if msg:
|
|
483
|
+
full_response += msg
|
|
484
|
+
text_content.append(msg)
|
|
485
|
+
panel.subtitle = "正在回答..."
|
|
486
|
+
live.update(panel)
|
|
487
|
+
|
|
488
|
+
# 处理思考中的消息
|
|
489
|
+
elif data.get("type") == "think":
|
|
490
|
+
think_content = data.get("content", "")
|
|
491
|
+
if think_content:
|
|
492
|
+
thinking_content = think_content
|
|
493
|
+
panel.subtitle = f"思考中: {thinking_content}"
|
|
494
|
+
live.update(panel)
|
|
495
|
+
|
|
496
|
+
except json.JSONDecodeError:
|
|
497
|
+
pass
|
|
498
|
+
|
|
499
|
+
# 检测结束标志
|
|
500
|
+
elif line_str == "data: [DONE]":
|
|
501
|
+
break
|
|
502
|
+
|
|
503
|
+
# 显示对话完成状态
|
|
504
|
+
panel.subtitle = "[bold green]对话完成[/bold green]"
|
|
505
|
+
live.update(panel)
|
|
506
|
+
else:
|
|
507
|
+
# 如果禁止输出,则静默处理
|
|
508
|
+
for line in response.iter_lines():
|
|
509
|
+
if not line:
|
|
510
|
+
continue
|
|
481
511
|
|
|
482
|
-
|
|
483
|
-
pass
|
|
512
|
+
line_str = line.decode('utf-8')
|
|
484
513
|
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
514
|
+
# SSE格式的行通常以"data: "开头
|
|
515
|
+
if line_str.startswith("data: "):
|
|
516
|
+
try:
|
|
517
|
+
data_str = line_str[6:] # 移除"data: "前缀
|
|
518
|
+
data = json.loads(data_str)
|
|
488
519
|
|
|
489
|
-
|
|
490
|
-
|
|
520
|
+
# 处理文本类型的消息
|
|
521
|
+
if data.get("type") == "text":
|
|
522
|
+
is_text_block = True
|
|
523
|
+
msg = data.get("msg", "")
|
|
524
|
+
if msg:
|
|
525
|
+
full_response += msg
|
|
526
|
+
|
|
527
|
+
except json.JSONDecodeError:
|
|
528
|
+
pass
|
|
529
|
+
|
|
530
|
+
# 检测结束标志
|
|
531
|
+
elif line_str == "data: [DONE]":
|
|
532
|
+
break
|
|
491
533
|
|
|
492
534
|
return full_response
|
|
493
535
|
|
|
@@ -51,6 +51,7 @@ def list_platforms():
|
|
|
51
51
|
def chat_with_model(platform_name: str, model_name: str):
|
|
52
52
|
"""Chat with specified platform and model"""
|
|
53
53
|
registry = PlatformRegistry.get_global_platform_registry()
|
|
54
|
+
conversation_history = [] # 存储对话记录
|
|
54
55
|
|
|
55
56
|
# Create platform instance
|
|
56
57
|
platform = registry.create_platform(platform_name)
|
|
@@ -63,12 +64,13 @@ def chat_with_model(platform_name: str, model_name: str):
|
|
|
63
64
|
platform.set_model_name(model_name)
|
|
64
65
|
platform.set_suppress_output(False)
|
|
65
66
|
PrettyOutput.print(f"连接到 {platform_name} 平台 {model_name} 模型", OutputType.SUCCESS)
|
|
66
|
-
PrettyOutput.print("可用命令: /bye - 退出聊天, /clear - 清除会话, /upload - 上传文件, /shell - 执行shell
|
|
67
|
+
PrettyOutput.print("可用命令: /bye - 退出聊天, /clear - 清除会话, /upload - 上传文件, /shell - 执行shell命令, /save - 保存当前对话, /saveall - 保存所有对话", OutputType.INFO)
|
|
67
68
|
|
|
68
69
|
# Start conversation loop
|
|
69
70
|
while True:
|
|
70
71
|
# Get user input
|
|
71
72
|
user_input = get_multiline_input("")
|
|
73
|
+
conversation_history.append({"role": "user", "content": user_input}) # 记录用户输入
|
|
72
74
|
|
|
73
75
|
# Check if input is cancelled
|
|
74
76
|
if user_input.strip() == "/bye":
|
|
@@ -84,6 +86,7 @@ def chat_with_model(platform_name: str, model_name: str):
|
|
|
84
86
|
try:
|
|
85
87
|
platform.reset()
|
|
86
88
|
platform.set_model_name(model_name) # Reinitialize session
|
|
89
|
+
conversation_history = [] # 重置对话记录
|
|
87
90
|
PrettyOutput.print("会话已清除", OutputType.SUCCESS)
|
|
88
91
|
except Exception as e:
|
|
89
92
|
PrettyOutput.print(f"清除会话失败: {str(e)}", OutputType.ERROR)
|
|
@@ -110,6 +113,52 @@ def chat_with_model(platform_name: str, model_name: str):
|
|
|
110
113
|
PrettyOutput.print(f"上传文件失败: {str(e)}", OutputType.ERROR)
|
|
111
114
|
continue
|
|
112
115
|
|
|
116
|
+
# Check if it is a save command
|
|
117
|
+
if user_input.strip().startswith("/save"):
|
|
118
|
+
try:
|
|
119
|
+
file_path = user_input.strip()[5:].strip()
|
|
120
|
+
if not file_path:
|
|
121
|
+
PrettyOutput.print("请指定保存文件名,例如: /save last_message.txt", OutputType.WARNING)
|
|
122
|
+
continue
|
|
123
|
+
|
|
124
|
+
# Remove quotes if present
|
|
125
|
+
if (file_path.startswith('"') and file_path.endswith('"')) or (file_path.startswith("'") and file_path.endswith("'")):
|
|
126
|
+
file_path = file_path[1:-1]
|
|
127
|
+
|
|
128
|
+
# Write last message content to file
|
|
129
|
+
if conversation_history:
|
|
130
|
+
with open(file_path, 'w', encoding='utf-8') as f:
|
|
131
|
+
last_entry = conversation_history[-1]
|
|
132
|
+
f.write(f"{last_entry['content']}\n")
|
|
133
|
+
PrettyOutput.print(f"最后一条消息内容已保存到 {file_path}", OutputType.SUCCESS)
|
|
134
|
+
else:
|
|
135
|
+
PrettyOutput.print("没有可保存的消息", OutputType.WARNING)
|
|
136
|
+
except Exception as e:
|
|
137
|
+
PrettyOutput.print(f"保存消息失败: {str(e)}", OutputType.ERROR)
|
|
138
|
+
continue
|
|
139
|
+
|
|
140
|
+
# Check if it is a saveall command
|
|
141
|
+
if user_input.strip().startswith("/saveall"):
|
|
142
|
+
try:
|
|
143
|
+
file_path = user_input.strip()[8:].strip()
|
|
144
|
+
if not file_path:
|
|
145
|
+
PrettyOutput.print("请指定保存文件名,例如: /saveall all_conversations.txt", OutputType.WARNING)
|
|
146
|
+
continue
|
|
147
|
+
|
|
148
|
+
# Remove quotes if present
|
|
149
|
+
if (file_path.startswith('"') and file_path.endswith('"')) or (file_path.startswith("'") and file_path.endswith("'")):
|
|
150
|
+
file_path = file_path[1:-1]
|
|
151
|
+
|
|
152
|
+
# Write full conversation history to file
|
|
153
|
+
with open(file_path, 'w', encoding='utf-8') as f:
|
|
154
|
+
for entry in conversation_history:
|
|
155
|
+
f.write(f"{entry['role']}: {entry['content']}\n\n")
|
|
156
|
+
|
|
157
|
+
PrettyOutput.print(f"所有对话已保存到 {file_path}", OutputType.SUCCESS)
|
|
158
|
+
except Exception as e:
|
|
159
|
+
PrettyOutput.print(f"保存所有对话失败: {str(e)}", OutputType.ERROR)
|
|
160
|
+
continue
|
|
161
|
+
|
|
113
162
|
# Check if it is a shell command
|
|
114
163
|
if user_input.strip().startswith("/shell"):
|
|
115
164
|
try:
|
|
@@ -133,6 +182,8 @@ def chat_with_model(platform_name: str, model_name: str):
|
|
|
133
182
|
response = platform.chat_until_success(user_input)
|
|
134
183
|
if not response:
|
|
135
184
|
PrettyOutput.print("没有有效的回复", OutputType.WARNING)
|
|
185
|
+
else:
|
|
186
|
+
conversation_history.append({"role": "assistant", "content": response}) # 记录模型回复
|
|
136
187
|
|
|
137
188
|
except Exception as e:
|
|
138
189
|
PrettyOutput.print(f"聊天失败: {str(e)}", OutputType.ERROR)
|
jarvis/jarvis_tools/cli/main.py
CHANGED
|
@@ -6,7 +6,26 @@ from jarvis.jarvis_utils.utils import init_env
|
|
|
6
6
|
|
|
7
7
|
|
|
8
8
|
def main() -> int:
|
|
9
|
-
"""
|
|
9
|
+
"""
|
|
10
|
+
命令行工具入口,提供工具列表查看和工具调用功能
|
|
11
|
+
|
|
12
|
+
功能:
|
|
13
|
+
1. 列出所有可用工具 (list命令)
|
|
14
|
+
2. 调用指定工具 (call命令)
|
|
15
|
+
|
|
16
|
+
参数:
|
|
17
|
+
通过命令行参数传递,包括:
|
|
18
|
+
- list: 列出工具
|
|
19
|
+
--json: 以JSON格式输出
|
|
20
|
+
--detailed: 显示详细信息
|
|
21
|
+
- call: 调用工具
|
|
22
|
+
tool_name: 工具名称
|
|
23
|
+
--args: 工具参数(JSON格式)
|
|
24
|
+
--args-file: 从文件加载工具参数
|
|
25
|
+
|
|
26
|
+
返回值:
|
|
27
|
+
int: 0表示成功,非0表示错误
|
|
28
|
+
"""
|
|
10
29
|
import argparse
|
|
11
30
|
import json
|
|
12
31
|
|
|
@@ -34,24 +53,24 @@ def main() -> int:
|
|
|
34
53
|
registry = ToolRegistry()
|
|
35
54
|
|
|
36
55
|
if args.command == "list":
|
|
37
|
-
tools = registry.get_all_tools()
|
|
56
|
+
tools = registry.get_all_tools() # 从注册表获取所有工具信息
|
|
38
57
|
|
|
39
58
|
if args.json:
|
|
40
59
|
if args.detailed:
|
|
41
|
-
print(json.dumps(tools, indent=2, ensure_ascii=False))
|
|
60
|
+
print(json.dumps(tools, indent=2, ensure_ascii=False)) # 输出完整JSON格式
|
|
42
61
|
else:
|
|
43
62
|
simple_tools = [
|
|
44
63
|
{"name": t["name"], "description": t["description"]} for t in tools
|
|
45
|
-
]
|
|
64
|
+
] # 简化工具信息
|
|
46
65
|
print(json.dumps(simple_tools, indent=2, ensure_ascii=False))
|
|
47
66
|
else:
|
|
48
|
-
PrettyOutput.section("可用工具列表", OutputType.SYSTEM)
|
|
67
|
+
PrettyOutput.section("可用工具列表", OutputType.SYSTEM) # 使用美化输出
|
|
49
68
|
for tool in tools:
|
|
50
69
|
print(f"\n✅ {tool['name']}")
|
|
51
70
|
print(f" 描述: {tool['description']}")
|
|
52
71
|
if args.detailed:
|
|
53
72
|
print(f" 参数:")
|
|
54
|
-
print(tool["parameters"])
|
|
73
|
+
print(tool["parameters"]) # 显示详细参数信息
|
|
55
74
|
|
|
56
75
|
elif args.command == "call":
|
|
57
76
|
tool_name = args.tool_name
|
|
@@ -63,11 +82,11 @@ def main() -> int:
|
|
|
63
82
|
print(f"可用工具: {available_tools}")
|
|
64
83
|
return 1
|
|
65
84
|
|
|
66
|
-
#
|
|
85
|
+
# 获取参数: 支持从命令行直接传入或从文件加载
|
|
67
86
|
tool_args = {}
|
|
68
87
|
if args.args:
|
|
69
88
|
try:
|
|
70
|
-
tool_args = json.loads(args.args)
|
|
89
|
+
tool_args = json.loads(args.args) # 解析JSON格式参数
|
|
71
90
|
except json.JSONDecodeError:
|
|
72
91
|
PrettyOutput.print("错误: 参数必须是有效的JSON格式", OutputType.ERROR)
|
|
73
92
|
return 1
|
|
@@ -75,14 +94,14 @@ def main() -> int:
|
|
|
75
94
|
elif args.args_file:
|
|
76
95
|
try:
|
|
77
96
|
with open(args.args_file, "r", encoding="utf-8") as f:
|
|
78
|
-
tool_args = json.load(f)
|
|
97
|
+
tool_args = json.load(f) # 从文件加载JSON参数
|
|
79
98
|
except (json.JSONDecodeError, FileNotFoundError) as e:
|
|
80
99
|
PrettyOutput.print(
|
|
81
100
|
f"错误: 无法从文件加载参数: {str(e)}", OutputType.ERROR
|
|
82
101
|
)
|
|
83
102
|
return 1
|
|
84
103
|
|
|
85
|
-
#
|
|
104
|
+
# 检查必需参数是否完整
|
|
86
105
|
required_params = tool_obj.parameters.get("required", [])
|
|
87
106
|
missing_params = [p for p in required_params if p not in tool_args]
|
|
88
107
|
|
|
@@ -98,10 +117,10 @@ def main() -> int:
|
|
|
98
117
|
print(f" - {param_name}: {desc}")
|
|
99
118
|
return 1
|
|
100
119
|
|
|
101
|
-
#
|
|
120
|
+
# 执行工具并处理结果
|
|
102
121
|
result = registry.execute_tool(tool_name, tool_args)
|
|
103
122
|
|
|
104
|
-
#
|
|
123
|
+
# 显示执行结果
|
|
105
124
|
if result["success"]:
|
|
106
125
|
PrettyOutput.section(f"工具 {tool_name} 执行成功", OutputType.SUCCESS)
|
|
107
126
|
else:
|
jarvis/jarvis_tools/registry.py
CHANGED
|
@@ -33,8 +33,8 @@ tool_call_help = f"""
|
|
|
33
33
|
want: 想要从执行结果中获取到的信息,如果工具输出内容过长,会根据此字段尝试提取有效信息
|
|
34
34
|
name: 工具名称
|
|
35
35
|
arguments:
|
|
36
|
-
|
|
37
|
-
|
|
36
|
+
param1: 值1
|
|
37
|
+
param2: 值2
|
|
38
38
|
{ct("TOOL_CALL")}
|
|
39
39
|
</format>
|
|
40
40
|
|
|
@@ -49,7 +49,7 @@ arguments:
|
|
|
49
49
|
<rule>
|
|
50
50
|
### 2. 严格遵守格式
|
|
51
51
|
- 完全按照上述格式
|
|
52
|
-
- 使用正确的YAML
|
|
52
|
+
- 使用正确的YAML格式,2个空格作为缩进
|
|
53
53
|
- 包含所有必需参数
|
|
54
54
|
</rule>
|
|
55
55
|
|
|
@@ -72,15 +72,15 @@ arguments:
|
|
|
72
72
|
|
|
73
73
|
<string_format>
|
|
74
74
|
# 📝 字符串参数格式
|
|
75
|
-
始终使用 |
|
|
75
|
+
始终使用 |2 语法表示字符串参数,防止多行字符串行首空格引起歧义:
|
|
76
76
|
|
|
77
77
|
{ot("TOOL_CALL")}
|
|
78
78
|
want: 当前的git状态,期望获取xxx的提交记录
|
|
79
79
|
name: execute_script
|
|
80
80
|
arguments:
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
81
|
+
interpreter: bash
|
|
82
|
+
script_cotent: |2
|
|
83
|
+
git status --porcelain
|
|
84
84
|
{ct("TOOL_CALL")}
|
|
85
85
|
</string_format>
|
|
86
86
|
|
|
@@ -96,7 +96,7 @@ arguments:
|
|
|
96
96
|
<common_errors>
|
|
97
97
|
# ⚠️ 常见错误
|
|
98
98
|
- 同时调用多个工具
|
|
99
|
-
- 字符串参数缺少 |
|
|
99
|
+
- 字符串参数缺少 |2
|
|
100
100
|
- 假设工具结果
|
|
101
101
|
- 创建虚构对话
|
|
102
102
|
- 在没有所需信息的情况下继续
|
jarvis/jarvis_utils/config.py
CHANGED
jarvis/jarvis_utils/embedding.py
CHANGED
|
@@ -20,131 +20,72 @@ def get_context_token_count(text: str) -> int:
|
|
|
20
20
|
int: 文本中的token数量
|
|
21
21
|
"""
|
|
22
22
|
try:
|
|
23
|
-
# 使用擅长处理通用文本的快速分词器
|
|
24
23
|
tokenizer = load_tokenizer()
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
24
|
+
# 分批处理长文本,确保不超过模型最大长度
|
|
25
|
+
total_tokens = 0
|
|
26
|
+
chunk_size = 100 # 每次处理100个字符,避免超过模型最大长度(考虑到中文字符可能被编码成多个token)
|
|
27
|
+
for i in range(0, len(text), chunk_size):
|
|
28
|
+
chunk = text[i:i + chunk_size]
|
|
29
|
+
tokens = tokenizer.encode(chunk) # type: ignore
|
|
30
|
+
total_tokens += len(tokens)
|
|
31
|
+
return total_tokens
|
|
28
32
|
except Exception as e:
|
|
29
33
|
PrettyOutput.print(f"计算token失败: {str(e)}", OutputType.WARNING)
|
|
30
|
-
# 回退到基于字符的粗略估计
|
|
31
34
|
return len(text) // 4 # 每个token大约4个字符的粗略估计
|
|
32
35
|
|
|
33
36
|
def split_text_into_chunks(text: str, max_length: int = 512, min_length: int = 50) -> List[str]:
|
|
34
|
-
"""
|
|
37
|
+
"""将文本分割成块,基于token数量进行切割。
|
|
35
38
|
|
|
36
39
|
参数:
|
|
37
40
|
text: 要分割的输入文本
|
|
38
|
-
max_length:
|
|
39
|
-
min_length:
|
|
41
|
+
max_length: 每个块的最大token数量
|
|
42
|
+
min_length: 每个块的最小token数量(除了最后一块可能较短)
|
|
40
43
|
|
|
41
44
|
返回:
|
|
42
|
-
List[str]:
|
|
45
|
+
List[str]: 文本块列表,每个块的token数量不超过max_length且不小于min_length
|
|
43
46
|
"""
|
|
44
47
|
if not text:
|
|
45
48
|
return []
|
|
46
49
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
# 初始化结束位置为最大可能长度
|
|
64
|
-
end = min(start + max_length, len(text))
|
|
65
|
-
|
|
66
|
-
# 只有当不是最后一块且结束位置等于最大长度时,才尝试寻找句子边界
|
|
67
|
-
if end < len(text) and end == start + max_length:
|
|
68
|
-
# 优先查找段落边界,这对RAG特别重要
|
|
69
|
-
paragraph_boundary = text.rfind('\n\n', start, end)
|
|
70
|
-
if paragraph_boundary > start and paragraph_boundary < end - min_length: # 确保不会切得太短
|
|
71
|
-
end = paragraph_boundary + 2
|
|
50
|
+
try:
|
|
51
|
+
chunks = []
|
|
52
|
+
current_chunk = ""
|
|
53
|
+
current_tokens = 0
|
|
54
|
+
|
|
55
|
+
# 按较大的块处理文本,避免破坏token边界
|
|
56
|
+
chunk_size = 50 # 每次处理50个字符
|
|
57
|
+
for i in range(0, len(text), chunk_size):
|
|
58
|
+
chunk = text[i:i + chunk_size]
|
|
59
|
+
chunk_tokens = get_context_token_count(chunk)
|
|
60
|
+
|
|
61
|
+
# 如果当前块加上新块会超过最大长度,且当前块已经达到最小长度,则保存当前块
|
|
62
|
+
if current_tokens + chunk_tokens > max_length and current_tokens >= min_length:
|
|
63
|
+
chunks.append(current_chunk)
|
|
64
|
+
current_chunk = chunk
|
|
65
|
+
current_tokens = chunk_tokens
|
|
72
66
|
else:
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
# 如果没找到主要标点,再找次要标点(分号、冒号等)
|
|
88
|
-
if not found_boundary:
|
|
89
|
-
for i in range(end-1, max(start, end-search_range), -1):
|
|
90
|
-
if text[i] in secondary_punctuation:
|
|
91
|
-
best_boundary = i
|
|
92
|
-
found_boundary = True
|
|
93
|
-
break
|
|
94
|
-
|
|
95
|
-
# 最后考虑逗号和其他可能的边界
|
|
96
|
-
if not found_boundary:
|
|
97
|
-
for i in range(end-1, max(start, end-search_range), -1):
|
|
98
|
-
if text[i] in tertiary_punctuation:
|
|
99
|
-
best_boundary = i
|
|
100
|
-
found_boundary = True
|
|
101
|
-
break
|
|
102
|
-
|
|
103
|
-
# 如果找到了合适的边界且不会导致太短的块,使用它
|
|
104
|
-
if found_boundary and (best_boundary - start) >= min_length:
|
|
105
|
-
end = best_boundary + 1
|
|
106
|
-
|
|
107
|
-
# 添加当前块,并确保删除开头和结尾的空白字符
|
|
108
|
-
chunk = text[start:end].strip()
|
|
109
|
-
if chunk and len(chunk) >= min_length: # 只添加符合最小长度的非空块
|
|
110
|
-
chunks.append(chunk)
|
|
111
|
-
elif chunk and not chunks: # 如果是第一个块且小于最小长度,也添加它
|
|
112
|
-
chunks.append(chunk)
|
|
113
|
-
elif chunk: # 如果块太小,尝试与前一个块合并
|
|
114
|
-
if chunks:
|
|
115
|
-
if len(chunks[-1]) + len(chunk) <= max_length * 1.1: # 允许略微超过最大长度
|
|
116
|
-
chunks[-1] = chunks[-1] + " " + chunk
|
|
67
|
+
current_chunk += chunk
|
|
68
|
+
current_tokens += chunk_tokens
|
|
69
|
+
|
|
70
|
+
# 处理最后一个块
|
|
71
|
+
if current_chunk:
|
|
72
|
+
if current_tokens >= min_length:
|
|
73
|
+
chunks.append(current_chunk)
|
|
74
|
+
elif chunks: # 如果最后一个块太短,尝试与前面的块合并
|
|
75
|
+
last_chunk = chunks[-1]
|
|
76
|
+
combined = last_chunk + current_chunk
|
|
77
|
+
combined_tokens = get_context_token_count(combined)
|
|
78
|
+
if combined_tokens <= max_length:
|
|
79
|
+
chunks[-1] = combined
|
|
117
80
|
else:
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
#
|
|
125
|
-
|
|
126
|
-
next_start = start + max(1, min_length // 2)
|
|
127
|
-
|
|
128
|
-
start = next_start
|
|
129
|
-
|
|
130
|
-
# 最后检查是否有太短的块,尝试合并相邻的短块
|
|
131
|
-
if len(chunks) > 1:
|
|
132
|
-
merged_chunks = []
|
|
133
|
-
i = 0
|
|
134
|
-
while i < len(chunks):
|
|
135
|
-
current = chunks[i]
|
|
136
|
-
# 如果当前块太短且不是最后一个块,尝试与下一个合并
|
|
137
|
-
if len(current) < min_length and i < len(chunks) - 1:
|
|
138
|
-
next_chunk = chunks[i + 1]
|
|
139
|
-
if len(current) + len(next_chunk) <= max_length * 1.1:
|
|
140
|
-
merged_chunks.append(current + " " + next_chunk)
|
|
141
|
-
i += 2 # 跳过下一个块
|
|
142
|
-
continue
|
|
143
|
-
merged_chunks.append(current)
|
|
144
|
-
i += 1
|
|
145
|
-
chunks = merged_chunks
|
|
146
|
-
|
|
147
|
-
return chunks
|
|
81
|
+
chunks.append(current_chunk)
|
|
82
|
+
|
|
83
|
+
return chunks
|
|
84
|
+
|
|
85
|
+
except Exception as e:
|
|
86
|
+
PrettyOutput.print(f"文本分割失败: {str(e)}", OutputType.WARNING)
|
|
87
|
+
# 发生错误时回退到简单的字符分割
|
|
88
|
+
return [text[i:i + max_length] for i in range(0, len(text), max_length)]
|
|
148
89
|
|
|
149
90
|
|
|
150
91
|
@functools.lru_cache(maxsize=1)
|
|
@@ -12,6 +12,7 @@ import json
|
|
|
12
12
|
import tempfile
|
|
13
13
|
from typing import Any, Dict, Optional
|
|
14
14
|
|
|
15
|
+
from jarvis.jarvis_platform.base import BasePlatform
|
|
15
16
|
from jarvis.jarvis_utils.config import get_data_dir
|
|
16
17
|
from jarvis.jarvis_utils.output import PrettyOutput, OutputType
|
|
17
18
|
from jarvis.jarvis_platform.registry import PlatformRegistry
|
|
@@ -92,6 +93,42 @@ def _create_methodology_temp_file(methodologies: Dict[str, str]) -> Optional[str
|
|
|
92
93
|
PrettyOutput.print(f"创建方法论临时文件失败: {str(e)}", OutputType.ERROR)
|
|
93
94
|
return None
|
|
94
95
|
|
|
96
|
+
def upload_methodology(platform: BasePlatform) -> bool:
|
|
97
|
+
"""
|
|
98
|
+
上传方法论文件到指定平台
|
|
99
|
+
|
|
100
|
+
参数:
|
|
101
|
+
platform: 平台实例,需实现upload_files方法
|
|
102
|
+
|
|
103
|
+
返回:
|
|
104
|
+
bool: 上传是否成功
|
|
105
|
+
"""
|
|
106
|
+
methodology_dir = _get_methodology_directory()
|
|
107
|
+
if not os.path.exists(methodology_dir):
|
|
108
|
+
PrettyOutput.print("方法论文档不存在", OutputType.WARNING)
|
|
109
|
+
return False
|
|
110
|
+
|
|
111
|
+
methodologies = _load_all_methodologies()
|
|
112
|
+
if not methodologies:
|
|
113
|
+
PrettyOutput.print("没有可用的方法论文档", OutputType.WARNING)
|
|
114
|
+
return False
|
|
115
|
+
|
|
116
|
+
temp_file_path = _create_methodology_temp_file(methodologies)
|
|
117
|
+
if not temp_file_path:
|
|
118
|
+
return False
|
|
119
|
+
|
|
120
|
+
try:
|
|
121
|
+
if hasattr(platform, 'upload_files'):
|
|
122
|
+
return platform.upload_files([temp_file_path])
|
|
123
|
+
return False
|
|
124
|
+
finally:
|
|
125
|
+
if temp_file_path and os.path.exists(temp_file_path):
|
|
126
|
+
try:
|
|
127
|
+
os.remove(temp_file_path)
|
|
128
|
+
except Exception:
|
|
129
|
+
pass
|
|
130
|
+
|
|
131
|
+
|
|
95
132
|
def load_methodology(user_input: str, tool_registery: Optional[Any] = None) -> str:
|
|
96
133
|
"""
|
|
97
134
|
加载方法论并上传到大模型。
|
jarvis/jarvis_utils/output.py
CHANGED
|
@@ -204,26 +204,7 @@ class PrettyOutput:
|
|
|
204
204
|
console.print()
|
|
205
205
|
console.print(panel)
|
|
206
206
|
console.print()
|
|
207
|
-
|
|
208
|
-
def print_stream(text: str, is_thinking: bool = False):
|
|
209
|
-
"""
|
|
210
|
-
打印流式输出,不带换行符。
|
|
211
|
-
|
|
212
|
-
参数:
|
|
213
|
-
text: 要打印的文本
|
|
214
|
-
"""
|
|
215
|
-
style = RichStyle(color="bright_cyan", bold=True, frame=True, meta={"icon": "🤖"})
|
|
216
|
-
if is_thinking:
|
|
217
|
-
style = RichStyle(color="grey58", italic=True, frame=True, meta={"icon": "🤖"})
|
|
218
|
-
console.print(text, style=style, end="")
|
|
219
|
-
@staticmethod
|
|
220
|
-
def print_stream_end():
|
|
221
|
-
"""
|
|
222
|
-
结束流式输出,带换行符。
|
|
223
|
-
"""
|
|
224
|
-
end_style = PrettyOutput._get_style(OutputType.SUCCESS)
|
|
225
|
-
console.print("\n", style=end_style)
|
|
226
|
-
console.file.flush()
|
|
207
|
+
|
|
227
208
|
@staticmethod
|
|
228
209
|
def _get_style(output_type: OutputType) -> RichStyle:
|
|
229
210
|
"""
|