beswarm 0.1.57__py3-none-any.whl → 0.1.59__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
beswarm/aient/setup.py CHANGED
@@ -4,7 +4,7 @@ from setuptools import setup, find_packages
4
4
 
5
5
  setup(
6
6
  name="aient",
7
- version="1.1.12",
7
+ version="1.1.14",
8
8
  description="Aient: The Awakening of Agent.",
9
9
  long_description=Path.open(Path("README.md"), encoding="utf-8").read(),
10
10
  long_description_content_type="text/markdown",
@@ -48,6 +48,7 @@ async def get_gemini_payload(request, engine, provider, api_key=None):
48
48
 
49
49
  messages = []
50
50
  systemInstruction = None
51
+ system_prompt = ""
51
52
  function_arguments = None
52
53
  for msg in request.messages:
53
54
  if msg.role == "assistant":
@@ -102,7 +103,8 @@ async def get_gemini_payload(request, engine, provider, api_key=None):
102
103
  messages.append({"role": msg.role, "parts": content})
103
104
  elif msg.role == "system":
104
105
  content[0]["text"] = re.sub(r"_+", "_", content[0]["text"])
105
- systemInstruction = {"parts": content}
106
+ system_prompt = system_prompt + "\n\n" + content[0]["text"]
107
+ systemInstruction = {"parts": [{"text": system_prompt}]}
106
108
 
107
109
  if any(off_model in original_model for off_model in gemini_max_token_65k_models):
108
110
  safety_settings = "OFF"
@@ -212,23 +214,35 @@ async def get_gemini_payload(request, engine, provider, api_key=None):
212
214
  else:
213
215
  payload["generationConfig"]["maxOutputTokens"] = 8192
214
216
 
215
- # 从请求模型名中检测思考预算设置
216
- m = re.match(r".*-think-(-?\d+)", request.model)
217
- if m:
218
- try:
219
- val = int(m.group(1))
220
- if val < 0:
221
- val = 0
222
- elif val > 24576:
223
- val = 24576
224
- payload["generationConfig"]["thinkingConfig"] = {"thinkingBudget": val}
225
- except ValueError:
226
- # 如果转换为整数失败,忽略思考预算设置
227
- pass
228
-
229
- # 检测search标签
230
- if request.model.endswith("-search"):
231
- payload["tools"] = [{"googleSearch": {}}]
217
+ if "gemini-2.5" in original_model:
218
+ payload["generationConfig"]["thinkingConfig"] = {
219
+ "includeThoughts": True,
220
+ }
221
+ # 从请求模型名中检测思考预算设置
222
+ m = re.match(r".*-think-(-?\d+)", request.model)
223
+ if m:
224
+ try:
225
+ val = int(m.group(1))
226
+ if val < 0:
227
+ val = 0
228
+ elif val > 24576:
229
+ val = 24576
230
+ payload["generationConfig"]["thinkingConfig"]["thinkingBudget"] = val
231
+ except ValueError:
232
+ # 如果转换为整数失败,忽略思考预算设置
233
+ pass
234
+
235
+ # # 检测search标签
236
+ # if request.model.endswith("-search"):
237
+ # payload["tools"] = [{"googleSearch": {}}]
238
+
239
+ if safe_get(provider, "preferences", "post_body_parameter_overrides", default=None):
240
+ for key, value in safe_get(provider, "preferences", "post_body_parameter_overrides", default={}).items():
241
+ if key == request.model:
242
+ for k, v in value.items():
243
+ payload[k] = v
244
+ elif all(_model not in request.model.lower() for _model in ["gemini", "gpt", "claude"]):
245
+ payload[key] = value
232
246
 
233
247
  return url, headers, payload
234
248
 
@@ -303,16 +317,16 @@ async def get_vertex_gemini_payload(request, engine, provider, api_key=None):
303
317
  gemini_stream = "generateContent"
304
318
  model_dict = get_model_dict(provider)
305
319
  original_model = model_dict[request.model]
306
- search_tool = None
320
+ # search_tool = None
307
321
 
308
322
  # https://cloud.google.com/vertex-ai/generative-ai/docs/models/gemini/2-0-flash?hl=zh-cn
309
323
  pro_models = ["gemini-2.5", "gemini-2.0"]
310
324
  if any(pro_model in original_model for pro_model in pro_models):
311
325
  location = gemini2
312
- search_tool = {"googleSearch": {}}
326
+ # search_tool = {"googleSearch": {}}
313
327
  else:
314
328
  location = gemini1
315
- search_tool = {"googleSearchRetrieval": {}}
329
+ # search_tool = {"googleSearchRetrieval": {}}
316
330
 
317
331
  if "google-vertex-ai" in provider.get("base_url", ""):
318
332
  url = provider.get("base_url").rstrip('/') + "/v1/projects/{PROJECT_ID}/locations/{LOCATION}/publishers/google/models/{MODEL_ID}:{stream}".format(
@@ -334,6 +348,7 @@ async def get_vertex_gemini_payload(request, engine, provider, api_key=None):
334
348
 
335
349
  messages = []
336
350
  systemInstruction = None
351
+ system_prompt = ""
337
352
  function_arguments = None
338
353
  for msg in request.messages:
339
354
  if msg.role == "assistant":
@@ -387,7 +402,8 @@ async def get_vertex_gemini_payload(request, engine, provider, api_key=None):
387
402
  elif msg.role != "system":
388
403
  messages.append({"role": msg.role, "parts": content})
389
404
  elif msg.role == "system":
390
- systemInstruction = {"parts": content}
405
+ system_prompt = system_prompt + "\n\n" + content[0]["text"]
406
+ systemInstruction = {"parts": [{"text": system_prompt}]}
391
407
 
392
408
  if any(off_model in original_model for off_model in gemini_max_token_65k_models):
393
409
  safety_settings = "OFF"
@@ -469,8 +485,34 @@ async def get_vertex_gemini_payload(request, engine, provider, api_key=None):
469
485
  else:
470
486
  payload["generationConfig"]["max_output_tokens"] = 8192
471
487
 
472
- if request.model.endswith("-search"):
473
- payload["tools"] = [search_tool]
488
+ if "gemini-2.5" in original_model:
489
+ payload["generationConfig"]["thinkingConfig"] = {
490
+ "includeThoughts": True,
491
+ }
492
+ # 从请求模型名中检测思考预算设置
493
+ m = re.match(r".*-think-(-?\d+)", request.model)
494
+ if m:
495
+ try:
496
+ val = int(m.group(1))
497
+ if val < 0:
498
+ val = 0
499
+ elif val > 24576:
500
+ val = 24576
501
+ payload["generationConfig"]["thinkingConfig"]["thinkingBudget"] = val
502
+ except ValueError:
503
+ # 如果转换为整数失败,忽略思考预算设置
504
+ pass
505
+
506
+ # if request.model.endswith("-search"):
507
+ # payload["tools"] = [search_tool]
508
+
509
+ if safe_get(provider, "preferences", "post_body_parameter_overrides", default=None):
510
+ for key, value in safe_get(provider, "preferences", "post_body_parameter_overrides", default={}).items():
511
+ if key == request.model:
512
+ for k, v in value.items():
513
+ payload[k] = v
514
+ elif all(_model not in request.model.lower() for _model in ["gemini", "gpt", "claude"]):
515
+ payload[key] = value
474
516
 
475
517
  return url, headers, payload
476
518
 
@@ -893,6 +935,9 @@ async def get_gpt_payload(request, engine, provider, api_key=None):
893
935
  headers['Authorization'] = f"Bearer {api_key}"
894
936
 
895
937
  url = provider['base_url']
938
+ if "openrouter.ai" in url:
939
+ headers['HTTP-Referer'] = "https://github.com/yym68686/uni-api"
940
+ headers['X-Title'] = "Uni API"
896
941
 
897
942
  messages = []
898
943
  for msg in request.messages:
@@ -1010,7 +1055,11 @@ async def get_gpt_payload(request, engine, provider, api_key=None):
1010
1055
 
1011
1056
  if safe_get(provider, "preferences", "post_body_parameter_overrides", default=None):
1012
1057
  for key, value in safe_get(provider, "preferences", "post_body_parameter_overrides", default={}).items():
1013
- payload[key] = value
1058
+ if key == request.model:
1059
+ for k, v in value.items():
1060
+ payload[k] = v
1061
+ elif all(_model not in request.model.lower() for _model in ["gemini", "gpt", "claude"]):
1062
+ payload[key] = value
1014
1063
 
1015
1064
  return url, headers, payload
1016
1065
 
@@ -1104,7 +1153,11 @@ async def get_azure_payload(request, engine, provider, api_key=None):
1104
1153
 
1105
1154
  if safe_get(provider, "preferences", "post_body_parameter_overrides", default=None):
1106
1155
  for key, value in safe_get(provider, "preferences", "post_body_parameter_overrides", default={}).items():
1107
- payload[key] = value
1156
+ if key == request.model:
1157
+ for k, v in value.items():
1158
+ payload[k] = v
1159
+ elif all(_model not in request.model.lower() for _model in ["gemini", "gpt", "claude"]):
1160
+ payload[key] = value
1108
1161
 
1109
1162
  return url, headers, payload
1110
1163
 
@@ -1118,6 +1171,9 @@ async def get_openrouter_payload(request, engine, provider, api_key=None):
1118
1171
  headers['Authorization'] = f"Bearer {api_key}"
1119
1172
 
1120
1173
  url = provider['base_url']
1174
+ if "openrouter.ai" in url:
1175
+ headers['HTTP-Referer'] = "https://github.com/yym68686/uni-api"
1176
+ headers['X-Title'] = "Uni API"
1121
1177
 
1122
1178
  messages = []
1123
1179
  for msg in request.messages:
@@ -1165,6 +1221,14 @@ async def get_openrouter_payload(request, engine, provider, api_key=None):
1165
1221
  if field not in miss_fields and value is not None:
1166
1222
  payload[field] = value
1167
1223
 
1224
+ if safe_get(provider, "preferences", "post_body_parameter_overrides", default=None):
1225
+ for key, value in safe_get(provider, "preferences", "post_body_parameter_overrides", default={}).items():
1226
+ if key == request.model:
1227
+ for k, v in value.items():
1228
+ payload[k] = v
1229
+ elif all(_model not in request.model.lower() for _model in ["gemini", "gpt", "claude"]):
1230
+ payload[key] = value
1231
+
1168
1232
  return url, headers, payload
1169
1233
 
1170
1234
  async def get_cohere_payload(request, engine, provider, api_key=None):
@@ -1433,9 +1497,13 @@ async def get_claude_payload(request, engine, provider, api_key=None):
1433
1497
  message_index = message_index + 1
1434
1498
 
1435
1499
  if "claude-3-7-sonnet" in original_model:
1436
- max_tokens = 20000
1500
+ max_tokens = 128000
1437
1501
  elif "claude-3-5-sonnet" in original_model:
1438
1502
  max_tokens = 8192
1503
+ elif "claude-sonnet-4" in original_model:
1504
+ max_tokens = 64000
1505
+ elif "claude-opus-4" in original_model:
1506
+ max_tokens = 32000
1439
1507
  else:
1440
1508
  max_tokens = 4096
1441
1509
 
@@ -535,15 +535,25 @@ async def fetch_response(client, url, headers, payload, engine, model):
535
535
  # print("parsed_data", json.dumps(parsed_data, indent=4, ensure_ascii=False))
536
536
  content = ""
537
537
  reasoning_content = ""
538
- for item in parsed_data:
539
- chunk = safe_get(item, "candidates", 0, "content", "parts", 0, "text")
540
- is_think = safe_get(item, "candidates", 0, "content", "parts", 0, "thought", default=False)
538
+ parts_list = safe_get(parsed_data, 0, "candidates", 0, "content", "parts", default=[])
539
+ for item in parts_list:
540
+ chunk = safe_get(item, "text")
541
+ is_think = safe_get(item, "thought", default=False)
541
542
  # logger.info(f"chunk: {repr(chunk)}")
542
543
  if chunk:
543
544
  if is_think:
544
545
  reasoning_content += chunk
545
546
  else:
546
547
  content += chunk
548
+ # for item in parsed_data:
549
+ # chunk = safe_get(item, "candidates", 0, "content", "parts", 0, "text")
550
+ # is_think = safe_get(item, "candidates", 0, "content", "parts", 0, "thought", default=False)
551
+ # # logger.info(f"chunk: {repr(chunk)}")
552
+ # if chunk:
553
+ # if is_think:
554
+ # reasoning_content += chunk
555
+ # else:
556
+ # content += chunk
547
557
 
548
558
  usage_metadata = safe_get(parsed_data, -1, "usageMetadata")
549
559
  prompt_tokens = safe_get(usage_metadata, "promptTokenCount", default=0)
@@ -96,6 +96,7 @@ def get_engine(provider, endpoint=None, original_model=""):
96
96
  and "o3" not in original_model \
97
97
  and "o4" not in original_model \
98
98
  and "gemini" not in original_model \
99
+ and "gemma" not in original_model \
99
100
  and "learnlm" not in original_model \
100
101
  and "grok" not in original_model \
101
102
  and parsed_url.netloc != 'api.cloudflare.com' \
@@ -172,7 +172,8 @@ def excute_command(command):
172
172
  process.stderr.close()
173
173
 
174
174
  new_output_lines = []
175
- output_lines = "".join(output_lines).strip().replace("\\u001b[A", "").replace("\\r", "\r").replace("\\\\", "").replace("\\n", "\n").replace("\r", "+++").replace("\n", "+++")
175
+ output_lines = "".join(output_lines).strip().replace("\\r", "\r").replace("\\\\", "").replace("\\n", "\n").replace("\r", "+++").replace("\n", "+++")
176
+ output_lines = re.sub(r'\\u001b\[[0-9;]*[a-zA-Z]', '', output_lines)
176
177
  for line in output_lines.split("+++"):
177
178
  if line.strip() == "":
178
179
  continue
@@ -667,7 +667,6 @@ def convert_functions_to_xml(functions_list):
667
667
 
668
668
  if __name__ == "__main__":
669
669
 
670
- # 运行本文件:python -m beswarm.aient.src.aient.utils.scripts
671
670
  os.system("clear")
672
671
  test_xml = """
673
672
  ✅ 好的,我现在读取 `README.md` 文件。
@@ -742,4 +741,6 @@ if __name__ == "__main__":
742
741
  请提供前两个 `excute_command` 的执行结果。
743
742
  """
744
743
 
745
- print(parse_function_xml(test_xml))
744
+ print(parse_function_xml(test_xml))
745
+
746
+ # 运行本文件:python -m beswarm.aient.src.aient.utils.scripts
beswarm/tools/__init__.py CHANGED
@@ -1,7 +1,6 @@
1
1
  from .think import think
2
2
  from .edit_file import edit_file
3
3
  from .worker import worker
4
- from .UIworker import UIworker
5
4
 
6
5
  from .search_arxiv import search_arxiv
7
6
  from .repomap import get_code_repo_map
@@ -44,6 +43,5 @@ __all__ = [
44
43
  "find_and_click_element",
45
44
  "scroll_screen",
46
45
  "register_tool",
47
- "UIworker",
48
46
  "search_web",
49
47
  ]
beswarm/tools/repomap.py CHANGED
@@ -1,5 +1,6 @@
1
1
  import os
2
2
  import math
3
+ import json
3
4
  import time
4
5
  import random
5
6
  import shutil
@@ -634,7 +635,23 @@ class RepoMap:
634
635
  return data
635
636
 
636
637
  def get_tags_raw(self, fname, rel_fname):
637
- lang = filename_to_lang(str(self.root / Path(fname)))
638
+ # 检查是否为 .ipynb 文件,如果是则转换为 Python 代码再处理
639
+ if fname.endswith('.ipynb'):
640
+ # 读取 ipynb 文件内容
641
+ ipynb_content = self.io.read_text(str(self.root / Path(fname)))
642
+ if not ipynb_content:
643
+ return
644
+
645
+ # 转换为 Python 代码
646
+ py_content = self.convert_ipynb_to_py_content(ipynb_content)
647
+ if not py_content:
648
+ return
649
+
650
+ # 使用 Python 语言处理转换后的内容
651
+ lang = "python"
652
+ else:
653
+ lang = filename_to_lang(str(self.root / Path(fname)))
654
+
638
655
  # print(f"lang1: {lang}")
639
656
  if not lang:
640
657
  return
@@ -653,7 +670,11 @@ class RepoMap:
653
670
  return
654
671
  query_scm = query_scm.read_text()
655
672
 
656
- code = self.io.read_text(str(self.root / Path(fname)))
673
+ # 根据文件类型选择代码内容
674
+ if fname.endswith('.ipynb'):
675
+ code = py_content
676
+ else:
677
+ code = self.io.read_text(str(self.root / Path(fname)))
657
678
  # print(f"code: {code}")
658
679
  if not code:
659
680
  return
@@ -1090,13 +1111,23 @@ class RepoMap:
1090
1111
  or self.tree_context_cache[rel_fname]["mtime"] != mtime
1091
1112
  ):
1092
1113
  # print(f"abs_fname: {abs_fname}")
1093
- code = self.io.read_text(abs_fname) or ""
1114
+ # 处理 .ipynb 文件
1115
+ if str(abs_fname).endswith('.ipynb'):
1116
+ # 读取 ipynb 文件并转换
1117
+ ipynb_content = self.io.read_text(abs_fname) or ""
1118
+ code = self.convert_ipynb_to_py_content(ipynb_content) or ""
1119
+ # 使用虚拟的 .py 文件名以便 TreeContext 能识别
1120
+ context_filename = rel_fname.replace('.ipynb', '.py')
1121
+ else:
1122
+ code = self.io.read_text(abs_fname) or ""
1123
+ context_filename = rel_fname
1124
+
1094
1125
  # print(f"code: {code}")
1095
1126
  if not code.endswith("\n"):
1096
1127
  code += "\n"
1097
1128
 
1098
1129
  context = TreeContext(
1099
- rel_fname,
1130
+ context_filename,
1100
1131
  code,
1101
1132
  color=False,
1102
1133
  line_number=False,
@@ -1161,6 +1192,73 @@ class RepoMap:
1161
1192
 
1162
1193
  return output
1163
1194
 
1195
+ def convert_ipynb_to_py_content(self, ipynb_content):
1196
+ """
1197
+ 将 .ipynb 文件内容转换为 Python 代码字符串
1198
+ Markdown cells 转换为注释
1199
+ Code cells 保持为 Python 代码
1200
+ """
1201
+ try:
1202
+ notebook_data = json.loads(ipynb_content)
1203
+ except json.JSONDecodeError:
1204
+ return None
1205
+
1206
+ py_lines = []
1207
+
1208
+ for cell in notebook_data.get('cells', []):
1209
+ cell_type = cell.get('cell_type')
1210
+ source = cell.get('source', [])
1211
+
1212
+ if not isinstance(source, list):
1213
+ source = [source]
1214
+
1215
+ source_lines = "".join(source).splitlines()
1216
+
1217
+ if cell_type == 'markdown':
1218
+ for line in source_lines:
1219
+ py_lines.append(f"# {line}")
1220
+ py_lines.append("")
1221
+ elif cell_type == 'code':
1222
+ for line in source_lines:
1223
+ if line.startswith("!") or line.startswith("%"):
1224
+ py_lines.append(f"# {line}")
1225
+ else:
1226
+ py_lines.append(line)
1227
+
1228
+ outputs = cell.get('outputs', [])
1229
+ has_output_comment = False
1230
+ for output in outputs:
1231
+ output_type = output.get('output_type')
1232
+ if output_type == 'stream':
1233
+ if not has_output_comment:
1234
+ py_lines.append("# --- Output ---")
1235
+ has_output_comment = True
1236
+ text_output = output.get('text', [])
1237
+ if isinstance(text_output, list):
1238
+ for line in "".join(text_output).splitlines():
1239
+ py_lines.append(f"# {line}")
1240
+ else:
1241
+ for line in text_output.splitlines():
1242
+ py_lines.append(f"# {line}")
1243
+ elif output_type == 'execute_result':
1244
+ data = output.get('data', {})
1245
+ if 'text/plain' in data:
1246
+ if not has_output_comment:
1247
+ py_lines.append("# --- Output ---")
1248
+ has_output_comment = True
1249
+ text_output = data['text/plain']
1250
+ if isinstance(text_output, list):
1251
+ for line in "".join(text_output).splitlines():
1252
+ py_lines.append(f"# {line}")
1253
+ else:
1254
+ for line in text_output.splitlines():
1255
+ py_lines.append(f"# {line}")
1256
+ if has_output_comment:
1257
+ py_lines.append("# --- End Output ---")
1258
+ py_lines.append("")
1259
+
1260
+ return '\n'.join(py_lines)
1261
+
1164
1262
 
1165
1263
  def find_src_files(directory):
1166
1264
  if not os.path.isdir(directory):
@@ -1287,6 +1385,7 @@ if __name__ == "__main__":
1287
1385
  # print(get_code_repo_map("."))
1288
1386
  # print(get_code_repo_map("/Users/yanyuming/Downloads/GitHub/uni-api"))
1289
1387
  # print(get_code_repo_map("/Users/yanyuming/Downloads/GitHub/text-to-motion"))
1290
- print(get_code_repo_map("/Users/yanyuming/Downloads/GitHub/beswarm/work/secretary/secretary"))
1388
+ # print(get_code_repo_map("/Users/yanyuming/Downloads/GitHub/beswarm/work/secretary/secretary"))
1389
+ print(get_code_repo_map("/Users/yanyuming/Downloads/GitHub/beswarm/work/fer/fer"))
1291
1390
 
1292
1391
  # python -m beswarm.tools.repomap
beswarm/tools/worker.py CHANGED
@@ -2,13 +2,11 @@ import os
2
2
  import copy
3
3
  import platform
4
4
  from datetime import datetime
5
- from ..aient.src.aient.plugins import register_tool
6
5
 
7
6
  from ..aient.src.aient.models import chatgpt
8
- from ..aient.src.aient.plugins import get_function_call_list
7
+ from ..aient.src.aient.plugins import register_tool, get_function_call_list
9
8
  from ..aient.src.aient.prompt import system_prompt, instruction_system_prompt
10
-
11
- from ..utils import extract_xml_content
9
+ from ..utils import extract_xml_content, get_current_screen_image_message
12
10
 
13
11
  @register_tool()
14
12
  async def worker(goal, tools, work_dir, cache_messages=None):
@@ -64,24 +62,6 @@ async def worker(goal, tools, work_dir, cache_messages=None):
64
62
  work_agent = chatgpt(**work_agent_config)
65
63
  async def instruction_agent_task():
66
64
  while True:
67
- # 获取工作agent的对话历史
68
- # conversation_history = copy.deepcopy(work_agent.conversation["default"])
69
- # conversation_history.pop(0)
70
-
71
- # conversation_len = len(conversation_history) - 1
72
- # message_index = 0
73
- # while message_index < conversation_len:
74
- # if isinstance(conversation_history[message_index]["content"], str) and conversation_history[message_index]["content"].strip() == "":
75
- # conversation_history.pop(message_index)
76
- # conversation_len = conversation_len - 1
77
- # elif isinstance(conversation_history[message_index]["content"], list) and \
78
- # len(conversation_history[message_index]["content"]) > 0 and \
79
- # conversation_history[message_index]["content"][0].get("type") == "text" and \
80
- # conversation_history[message_index]["content"][0].get("text").strip() == "":
81
- # conversation_history.pop(message_index)
82
- # conversation_len = conversation_len - 1
83
- # else:
84
- # message_index = message_index + 1
85
65
 
86
66
  instruction_prompt = f"""
87
67
  </work_agent_conversation_end>
@@ -96,6 +76,8 @@ async def worker(goal, tools, work_dir, cache_messages=None):
96
76
  conversation_history = copy.deepcopy(work_agent.conversation["default"])
97
77
  conversation_history.pop(0)
98
78
  instruction_agent.conversation["default"][1:] = conversation_history
79
+ if "find_and_click_element" in str(tools_json):
80
+ instruction_prompt = await get_current_screen_image_message(instruction_prompt)
99
81
  next_instruction = await instruction_agent.ask_async(instruction_prompt)
100
82
  print("\n🤖 指令智能体生成的下一步指令:", next_instruction)
101
83
  if "fetch_gpt_response_stream HTTP Error', 'status_code': 404" in next_instruction:
@@ -107,6 +89,13 @@ async def worker(goal, tools, work_dir, cache_messages=None):
107
89
  print("\n❌ 指令智能体生成的指令不符合要求,请重新生成。")
108
90
  continue
109
91
  else:
92
+ if conversation_history == []:
93
+ next_instruction = (
94
+ "任务描述:\n"
95
+ f"{goal}\n\n"
96
+ "现在开始执行第一步:\n"
97
+ f"{next_instruction}"
98
+ )
110
99
  break
111
100
  return next_instruction
112
101
 
@@ -120,7 +109,8 @@ async def worker(goal, tools, work_dir, cache_messages=None):
120
109
  if "任务已完成" in next_instruction:
121
110
  print("\n✅ 任务已完成!")
122
111
  break
123
-
112
+ if "find_and_click_element" in str(tools_json):
113
+ next_instruction = await get_current_screen_image_message(next_instruction)
124
114
  result = await work_agent.ask_async(next_instruction)
125
115
  if result.strip() == '' or result.strip() == '</content>\n</write_to_file>':
126
116
  print("\n❌ 工作智能体回复为空,请重新生成指令。")
beswarm/utils.py CHANGED
@@ -8,4 +8,40 @@ def extract_xml_content(text, xml_tag):
8
8
  result = match.group(1)
9
9
  if not result:
10
10
  return ''
11
- return result
11
+ return result
12
+
13
+ import io
14
+ import base64
15
+ from .aient.src.aient.core.utils import get_image_message, get_text_message
16
+
17
+ async def get_current_screen_image_message(prompt):
18
+ print("instruction agent 正在截取当前屏幕...")
19
+ try:
20
+ import pyautogui
21
+ # 使用 pyautogui 截取屏幕,返回 PIL Image 对象
22
+ screenshot = pyautogui.screenshot()
23
+ # img_width, img_height = screenshot.size # 获取截图尺寸
24
+ img_width, img_height = pyautogui.size()
25
+ print(f"截图成功,尺寸: {img_width}x{img_height}")
26
+
27
+ # 将 PIL Image 对象转换为 Base64 编码的 PNG 字符串
28
+ buffered = io.BytesIO()
29
+ screenshot.save(buffered, format="PNG")
30
+ base64_encoded_image = base64.b64encode(buffered.getvalue()).decode("utf-8")
31
+ IMAGE_MIME_TYPE = "image/png" # 截图格式为 PNG
32
+
33
+ except ImportError:
34
+ # Pillow 也是 pyautogui 的依赖,但以防万一单独处理
35
+ print("\n❌ 请安装所需库: pip install Pillow pyautogui")
36
+ return False
37
+ except Exception as e:
38
+ print(f"\n❌ 截取屏幕或处理图像时出错: {e}")
39
+ return False
40
+
41
+ engine_type = "gpt"
42
+ message_list = []
43
+ text_message = await get_text_message(prompt, engine_type)
44
+ image_message = await get_image_message(f"data:{IMAGE_MIME_TYPE};base64," + base64_encoded_image, engine_type)
45
+ message_list.append(text_message)
46
+ message_list.append(image_message)
47
+ return message_list
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: beswarm
3
- Version: 0.1.57
3
+ Version: 0.1.59
4
4
  Summary: MAS
5
5
  Requires-Python: >=3.11
6
6
  Description-Content-Type: text/markdown
@@ -1,14 +1,14 @@
1
1
  beswarm/__init__.py,sha256=HZjUOJtZR5QhMuDbq-wukQQn1VrBusNWai_ysGo-VVI,20
2
- beswarm/utils.py,sha256=AdDCcqAIIKQEMl7PfryVgeT9G5sHe7QNsZnrvmTGA8E,283
2
+ beswarm/utils.py,sha256=Z2Kuus2BLp9EHUC2ZNL9iUsb6NWnPj-MTA7SYzGyg24,1755
3
3
  beswarm/aient/main.py,sha256=SiYAIgQlLJqYusnTVEJOx1WNkSJKMImhgn5aWjfroxg,3814
4
- beswarm/aient/setup.py,sha256=6ygixQucKMW9S7_9BHl6IsJJN8lqDYcuKC1wOPsrzp8,487
4
+ beswarm/aient/setup.py,sha256=qROUPbImzYY1XRZJ-vg_r1jy-51i5axd7ROuKJKxUJ4,487
5
5
  beswarm/aient/src/aient/__init__.py,sha256=SRfF7oDVlOOAi6nGKiJIUK6B_arqYLO9iSMp-2IZZps,21
6
6
  beswarm/aient/src/aient/core/__init__.py,sha256=NxjebTlku35S4Dzr16rdSqSTWUvvwEeACe8KvHJnjPg,34
7
7
  beswarm/aient/src/aient/core/log_config.py,sha256=kz2_yJv1p-o3lUQOwA3qh-LSc3wMHv13iCQclw44W9c,274
8
8
  beswarm/aient/src/aient/core/models.py,sha256=kF-HLi1I2k_G5r153ZHuiGH8_NmpTlFMfK0_myB28YQ,7366
9
- beswarm/aient/src/aient/core/request.py,sha256=VItemXnWzqzS10W-RuLVrARki1w7MZMBZdyqyA5axw8,61943
10
- beswarm/aient/src/aient/core/response.py,sha256=BNHLazjfQT8mVg7LnPLzlX429aQM3S03pumPbOpczCI,31518
11
- beswarm/aient/src/aient/core/utils.py,sha256=n3dyaApN4rrSduI8cjZbeD0mv8_O5LPTTbwRkj1_v4w,26540
9
+ beswarm/aient/src/aient/core/request.py,sha256=9GbzEg7jIH8s-jXeB1gsfoOsDbwg4C6LqXvRxVTnqEs,65263
10
+ beswarm/aient/src/aient/core/response.py,sha256=Z0Bjl_QvpUguyky1LIcsVks4BKKqT0eYEpDmKa_cwpQ,31978
11
+ beswarm/aient/src/aient/core/utils.py,sha256=-naFCv8V-qhnqvDUd8BNbW1HR9CVAPxISrXoAz464Qg,26580
12
12
  beswarm/aient/src/aient/core/test/test_base_api.py,sha256=pWnycRJbuPSXKKU9AQjWrMAX1wiLC_014Qc9hh5C2Pw,524
13
13
  beswarm/aient/src/aient/core/test/test_geminimask.py,sha256=HFX8jDbNg_FjjgPNxfYaR-0-roUrOO-ND-FVsuxSoiw,13254
14
14
  beswarm/aient/src/aient/core/test/test_image.py,sha256=_T4peNGdXKBHHxyQNx12u-NTyFE8TlYI6NvvagsG2LE,319
@@ -25,7 +25,7 @@ beswarm/aient/src/aient/models/vertex.py,sha256=qVD5l1Q538xXUPulxG4nmDjXE1VoV4yu
25
25
  beswarm/aient/src/aient/plugins/__init__.py,sha256=p3KO6Aa3Lupos4i2SjzLQw1hzQTigOAfEHngsldrsyk,986
26
26
  beswarm/aient/src/aient/plugins/arXiv.py,sha256=yHjb6PS3GUWazpOYRMKMzghKJlxnZ5TX8z9F6UtUVow,1461
27
27
  beswarm/aient/src/aient/plugins/config.py,sha256=Vp6CG9ocdC_FAlCMEGtKj45xamir76DFxdJVvURNtog,6539
28
- beswarm/aient/src/aient/plugins/excute_command.py,sha256=A3WmfZboEikU1EHvtMWhBv-xHxCyMxbDddQ982I_8wE,10482
28
+ beswarm/aient/src/aient/plugins/excute_command.py,sha256=huQSbNbeImV8BUIsQKE13BIhCAMr7aYRyXO4saE1dTI,10534
29
29
  beswarm/aient/src/aient/plugins/get_time.py,sha256=Ih5XIW5SDAIhrZ9W4Qe5Hs1k4ieKPUc_LAd6ySNyqZk,654
30
30
  beswarm/aient/src/aient/plugins/image.py,sha256=ZElCIaZznE06TN9xW3DrSukS7U3A5_cjk1Jge4NzPxw,2072
31
31
  beswarm/aient/src/aient/plugins/list_directory.py,sha256=JZVuImecMSfEv6jLqii-0uQJ1UCsrpMNmYlwW3PEDg4,1374
@@ -39,7 +39,7 @@ beswarm/aient/src/aient/prompt/__init__.py,sha256=GBtn6-JDT8KHFCcuPpfSNE_aGddg5p
39
39
  beswarm/aient/src/aient/prompt/agent.py,sha256=y2GETN6ScC5yQVs75VFfzm4YUWzblbqLYz0Sy6JnPRw,24950
40
40
  beswarm/aient/src/aient/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
41
41
  beswarm/aient/src/aient/utils/prompt.py,sha256=UcSzKkFE4-h_1b6NofI6xgk3GoleqALRKY8VBaXLjmI,11311
42
- beswarm/aient/src/aient/utils/scripts.py,sha256=wutPtgbs-WXo5AACLpnCJaRQBOSKXWNnsf2grbYDzyQ,29098
42
+ beswarm/aient/src/aient/utils/scripts.py,sha256=LD8adnfuRrJoY2tWKseXOPJXaxbrUmz4czsnUvHswNY,29096
43
43
  beswarm/aient/test/chatgpt.py,sha256=Hvl7FuDt1c74N5TVBmhErOPvJbJJzA7FNp5VoZM4u30,4957
44
44
  beswarm/aient/test/claude.py,sha256=IyB4qI1eJLwlSfDNSnt2FhbQWYyBighHUjJxEXc3osQ,1095
45
45
  beswarm/aient/test/test.py,sha256=rldnoLQdtRR8IKFSIzTti7eIK2MpPMoi9gL5qD8_K44,29
@@ -119,17 +119,16 @@ beswarm/queries/tree-sitter-languages/ruby-tags.scm,sha256=vIidsCeE2A0vdFN18yXKq
119
119
  beswarm/queries/tree-sitter-languages/rust-tags.scm,sha256=9ljM1nzhfPs_ZTRw7cr2P9ToOyhGcKkCoN4_HPXSWi4,1451
120
120
  beswarm/queries/tree-sitter-languages/scala-tags.scm,sha256=UxQjz80JIrrJ7Pm56uUnQyThfmQNvwk7aQzPNypB-Ao,1761
121
121
  beswarm/queries/tree-sitter-languages/typescript-tags.scm,sha256=OMdCeedPiA24ky82DpgTMKXK_l2ySTuF2zrQ2fJAi9E,1253
122
- beswarm/tools/UIworker.py,sha256=1sEC76VGFwo48lSx6KOvhJwhgBj7UWAHAAH9BG_lp-M,6439
123
- beswarm/tools/__init__.py,sha256=jOfYY4EYkwmz-FTJGrI1CyaIYkGWsmGzZBGsoupeX9M,1088
122
+ beswarm/tools/__init__.py,sha256=oDsCE7Coy3TXM0pTRS_4mWTEyPnsKVK7Vco1idSVxJk,1041
124
123
  beswarm/tools/click.py,sha256=TygaekCXTmU3fIu6Uom7ZcyzEgYMlCC_GX-5SmWHuLI,20762
125
124
  beswarm/tools/edit_file.py,sha256=xlAD0HB_xM0yZYc0eJwLE-9mAkywXa2UQPNHzG1OaW4,7664
126
125
  beswarm/tools/planner.py,sha256=lguBCS6kpwNPoXQvqH-WySabVubT82iyWOkJnjt6dXw,1265
127
- beswarm/tools/repomap.py,sha256=CwvwoN5Swr42EzrORTTeV8MMb7mPviy4a4b0fxBu50k,40828
126
+ beswarm/tools/repomap.py,sha256=N09K0UgwjCN7Zjg_5TYlVsulp3n2fztYlS8twalChU8,45003
128
127
  beswarm/tools/search_arxiv.py,sha256=9slwBemXjEqrd7-YgVmyMijPXlkhZCybEDRVhWVQ9B0,7937
129
128
  beswarm/tools/search_web.py,sha256=B24amOnGHnmdV_6S8bw8O2PdhZRRIDtJjg-wXcfP7dQ,11859
130
129
  beswarm/tools/think.py,sha256=WLw-7jNIsnS6n8MMSYUin_f-BGLENFmnKM2LISEp0co,1760
131
- beswarm/tools/worker.py,sha256=b-FvSEP27-zMYNcqaQeVBoWxaSf2cX_7_1p1GAF6h04,6191
132
- beswarm-0.1.57.dist-info/METADATA,sha256=FdjwXDGhJw1_43unl9YjSFbzl8jB5Pu-g0KX_Hn_TNA,3553
133
- beswarm-0.1.57.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
134
- beswarm-0.1.57.dist-info/top_level.txt,sha256=pJw4O87wvt5882smuSO6DfByJz7FJ8SxxT8h9fHCmpo,8
135
- beswarm-0.1.57.dist-info/RECORD,,
130
+ beswarm/tools/worker.py,sha256=PRsAa856KYNPbi2L8rDdWEKx3u-c_WzxoI1x8AFkmtc,5613
131
+ beswarm-0.1.59.dist-info/METADATA,sha256=LrUP7SrPGRG87cgSLMuE7liHKGGMIe4bQUdOhbfRifk,3553
132
+ beswarm-0.1.59.dist-info/WHEEL,sha256=zaaOINJESkSfm_4HQVc5ssNzHCPXhJm0kEUakpsEHaU,91
133
+ beswarm-0.1.59.dist-info/top_level.txt,sha256=pJw4O87wvt5882smuSO6DfByJz7FJ8SxxT8h9fHCmpo,8
134
+ beswarm-0.1.59.dist-info/RECORD,,
beswarm/tools/UIworker.py DELETED
@@ -1,147 +0,0 @@
1
- import os
2
- import io
3
- import copy
4
- import base64
5
- import platform
6
- from datetime import datetime
7
- from ..aient.src.aient.plugins import register_tool, get_function_call_list
8
-
9
- from ..aient.src.aient.models import chatgpt
10
- from ..aient.src.aient.prompt import system_prompt, instruction_system_prompt
11
- from ..aient.src.aient.core.utils import get_image_message, get_text_message
12
-
13
- from ..utils import extract_xml_content
14
-
15
- async def get_current_screen_image_message(prompt):
16
- print("instruction agent 正在截取当前屏幕...")
17
- try:
18
- import pyautogui
19
- # 使用 pyautogui 截取屏幕,返回 PIL Image 对象
20
- screenshot = pyautogui.screenshot()
21
- # img_width, img_height = screenshot.size # 获取截图尺寸
22
- img_width, img_height = pyautogui.size()
23
- print(f"截图成功,尺寸: {img_width}x{img_height}")
24
-
25
- # 将 PIL Image 对象转换为 Base64 编码的 PNG 字符串
26
- buffered = io.BytesIO()
27
- screenshot.save(buffered, format="PNG")
28
- base64_encoded_image = base64.b64encode(buffered.getvalue()).decode("utf-8")
29
- IMAGE_MIME_TYPE = "image/png" # 截图格式为 PNG
30
-
31
- except ImportError:
32
- # Pillow 也是 pyautogui 的依赖,但以防万一单独处理
33
- print("\n❌ 请安装所需库: pip install Pillow pyautogui")
34
- return False
35
- except Exception as e:
36
- print(f"\n❌ 截取屏幕或处理图像时出错: {e}")
37
- return False
38
-
39
- engine_type = "gpt"
40
- message_list = []
41
- text_message = await get_text_message(prompt, engine_type)
42
- image_message = await get_image_message(f"data:{IMAGE_MIME_TYPE};base64," + base64_encoded_image, engine_type)
43
- message_list.append(text_message)
44
- message_list.append(image_message)
45
- return message_list
46
-
47
- @register_tool()
48
- async def UIworker(goal, tools, work_dir, cache_messages=None):
49
- """
50
- 启动一个 **工作智能体 (Worker Agent)** 来自动完成指定的任务目标 (`goal`)。
51
-
52
- 这个工作智能体接收一个清晰的任务描述、一组可供调用的工具 (`tools`),以及一个工作目录 (`work_dir`)。
53
- 它会利用语言模型的能力,结合可用的工具,自主规划并逐步执行必要的操作,直到最终完成指定的任务目标。
54
- 核心功能是根据输入的目标,驱动整个任务执行流程。
55
-
56
- Args:
57
- goal (str): 需要完成的具体任务目标描述。工作智能体将围绕此目标进行工作。必须清晰、具体。
58
- tools (list[str]): 一个包含可用工具函数对象的列表。工作智能体在执行任务时可能会调用这些工具来与环境交互(例如读写文件、执行命令等)。
59
- work_dir (str): 工作目录的绝对路径。工作智能体将在此目录上下文中执行操作。
60
-
61
- Returns:
62
- str: 当任务成功完成时,返回字符串 "任务已完成"。
63
- """
64
-
65
- tools_json = [value for _, value in get_function_call_list(tools).items()]
66
- work_agent_system_prompt = system_prompt.format(
67
- os_version=platform.platform(),
68
- workspace_path=work_dir,
69
- shell=os.getenv('SHELL', 'Unknown'),
70
- tools_list=tools_json
71
- )
72
-
73
- work_agent_config = {
74
- "api_key": os.getenv("API_KEY"),
75
- "api_url": os.getenv("BASE_URL"),
76
- "engine": os.getenv("MODEL"),
77
- "system_prompt": work_agent_system_prompt,
78
- "print_log": True,
79
- # "max_tokens": 8000,
80
- "temperature": 0.5,
81
- "function_call_max_loop": 100,
82
- }
83
- if cache_messages:
84
- work_agent_config["cache_messages"] = cache_messages
85
-
86
- instruction_agent_config = {
87
- "api_key": os.getenv("API_KEY"),
88
- "api_url": os.getenv("BASE_URL"),
89
- "engine": os.getenv("MODEL"),
90
- "system_prompt": instruction_system_prompt.format(os_version=platform.platform(), tools_list=tools_json, workspace_path=work_dir, current_time=datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
91
- "print_log": False,
92
- # "max_tokens": 4000,
93
- "temperature": 0.7,
94
- "use_plugins": False,
95
- }
96
-
97
- # 工作agent初始化
98
- work_agent = chatgpt(**work_agent_config)
99
- async def instruction_agent_task():
100
- while True:
101
- instruction_prompt = f"""
102
- 任务目标: {goal}
103
-
104
- 以上对话都是工作智能体的对话历史。
105
-
106
- 根据以上对话历史和目标,请生成下一步指令。如果任务已完成,请回复"任务已完成"。
107
- """
108
- # 让指令agent分析对话历史并生成新指令
109
- instruction_agent = chatgpt(**instruction_agent_config)
110
- conversation_history = copy.deepcopy(work_agent.conversation["default"])
111
- conversation_history.pop(0)
112
- instruction_agent.conversation["default"][1:] = conversation_history
113
- new_prompt = await get_current_screen_image_message(instruction_prompt)
114
- next_instruction = await instruction_agent.ask_async(new_prompt)
115
- print("\n🤖 指令智能体生成的下一步指令:", next_instruction)
116
- if "fetch_gpt_response_stream HTTP Error', 'status_code': 404" in next_instruction:
117
- raise Exception(f"Model: {instruction_agent_config['engine']} not found!")
118
- if "'status_code': 413" in next_instruction:
119
- raise Exception(f"The request body is too long, please try again.")
120
- next_instruction = extract_xml_content(next_instruction, "instructions")
121
- if not next_instruction:
122
- print("\n❌ 指令智能体生成的指令不符合要求,请重新生成。")
123
- continue
124
- else:
125
- break
126
- return next_instruction
127
-
128
- need_instruction = True
129
- while True:
130
- next_instruction = ''
131
- if need_instruction:
132
- next_instruction = await instruction_agent_task()
133
-
134
- # 检查任务是否完成
135
- if "任务已完成" in next_instruction:
136
- print("\n✅ 任务已完成!")
137
- break
138
- new_prompt = await get_current_screen_image_message(next_instruction)
139
- result = await work_agent.ask_async(new_prompt)
140
- if result.strip() == '':
141
- print("\n❌ 工作智能体回复为空,请重新生成指令。")
142
- need_instruction = False
143
- continue
144
- print("✅ 工作智能体回复:", result)
145
- need_instruction = True
146
-
147
- return "任务已完成"