aient 1.2.39__tar.gz → 1.2.41__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {aient-1.2.39 → aient-1.2.41}/PKG-INFO +1 -1
- {aient-1.2.39 → aient-1.2.41}/aient/architext/architext/core.py +37 -1
- {aient-1.2.39 → aient-1.2.41}/aient/architext/test/test.py +74 -1
- {aient-1.2.39 → aient-1.2.41}/aient/models/chatgpt.py +27 -4
- {aient-1.2.39 → aient-1.2.41}/aient/utils/scripts.py +55 -0
- {aient-1.2.39 → aient-1.2.41}/aient.egg-info/PKG-INFO +1 -1
- {aient-1.2.39 → aient-1.2.41}/aient.egg-info/SOURCES.txt +0 -2
- {aient-1.2.39 → aient-1.2.41}/pyproject.toml +1 -1
- aient-1.2.39/aient/plugins/read_file.py +0 -198
- aient-1.2.39/aient/plugins/write_file.py +0 -90
- {aient-1.2.39 → aient-1.2.41}/LICENSE +0 -0
- {aient-1.2.39 → aient-1.2.41}/README.md +0 -0
- {aient-1.2.39 → aient-1.2.41}/aient/__init__.py +0 -0
- {aient-1.2.39 → aient-1.2.41}/aient/architext/architext/__init__.py +0 -0
- {aient-1.2.39 → aient-1.2.41}/aient/architext/test/openai_client.py +0 -0
- {aient-1.2.39 → aient-1.2.41}/aient/architext/test/test_save_load.py +0 -0
- {aient-1.2.39 → aient-1.2.41}/aient/core/__init__.py +0 -0
- {aient-1.2.39 → aient-1.2.41}/aient/core/log_config.py +0 -0
- {aient-1.2.39 → aient-1.2.41}/aient/core/models.py +0 -0
- {aient-1.2.39 → aient-1.2.41}/aient/core/request.py +0 -0
- {aient-1.2.39 → aient-1.2.41}/aient/core/response.py +0 -0
- {aient-1.2.39 → aient-1.2.41}/aient/core/test/test_base_api.py +0 -0
- {aient-1.2.39 → aient-1.2.41}/aient/core/test/test_geminimask.py +0 -0
- {aient-1.2.39 → aient-1.2.41}/aient/core/test/test_image.py +0 -0
- {aient-1.2.39 → aient-1.2.41}/aient/core/test/test_payload.py +0 -0
- {aient-1.2.39 → aient-1.2.41}/aient/core/utils.py +0 -0
- {aient-1.2.39 → aient-1.2.41}/aient/models/__init__.py +0 -0
- {aient-1.2.39 → aient-1.2.41}/aient/models/audio.py +0 -0
- {aient-1.2.39 → aient-1.2.41}/aient/models/base.py +0 -0
- {aient-1.2.39 → aient-1.2.41}/aient/plugins/__init__.py +0 -0
- {aient-1.2.39 → aient-1.2.41}/aient/plugins/arXiv.py +0 -0
- {aient-1.2.39 → aient-1.2.41}/aient/plugins/config.py +0 -0
- {aient-1.2.39 → aient-1.2.41}/aient/plugins/excute_command.py +0 -0
- {aient-1.2.39 → aient-1.2.41}/aient/plugins/get_time.py +0 -0
- {aient-1.2.39 → aient-1.2.41}/aient/plugins/image.py +0 -0
- {aient-1.2.39 → aient-1.2.41}/aient/plugins/list_directory.py +0 -0
- {aient-1.2.39 → aient-1.2.41}/aient/plugins/read_image.py +0 -0
- {aient-1.2.39 → aient-1.2.41}/aient/plugins/readonly.py +0 -0
- {aient-1.2.39 → aient-1.2.41}/aient/plugins/registry.py +0 -0
- {aient-1.2.39 → aient-1.2.41}/aient/plugins/run_python.py +0 -0
- {aient-1.2.39 → aient-1.2.41}/aient/plugins/websearch.py +0 -0
- {aient-1.2.39 → aient-1.2.41}/aient/utils/__init__.py +0 -0
- {aient-1.2.39 → aient-1.2.41}/aient/utils/prompt.py +0 -0
- {aient-1.2.39 → aient-1.2.41}/aient.egg-info/dependency_links.txt +0 -0
- {aient-1.2.39 → aient-1.2.41}/aient.egg-info/requires.txt +0 -0
- {aient-1.2.39 → aient-1.2.41}/aient.egg-info/top_level.txt +0 -0
- {aient-1.2.39 → aient-1.2.41}/setup.cfg +0 -0
- {aient-1.2.39 → aient-1.2.41}/test/test_Web_crawler.py +0 -0
- {aient-1.2.39 → aient-1.2.41}/test/test_ddg_search.py +0 -0
- {aient-1.2.39 → aient-1.2.41}/test/test_google_search.py +0 -0
- {aient-1.2.39 → aient-1.2.41}/test/test_ollama.py +0 -0
- {aient-1.2.39 → aient-1.2.41}/test/test_plugin.py +0 -0
- {aient-1.2.39 → aient-1.2.41}/test/test_url.py +0 -0
- {aient-1.2.39 → aient-1.2.41}/test/test_whisper.py +0 -0
@@ -192,6 +192,22 @@ class Texts(ContextProvider):
|
|
192
192
|
# For static content, compare the actual content.
|
193
193
|
return self.content == other.content
|
194
194
|
|
195
|
+
def __iadd__(self, other):
|
196
|
+
if isinstance(other, str):
|
197
|
+
new_text = self.content + other
|
198
|
+
self.update(new_text)
|
199
|
+
return self
|
200
|
+
return NotImplemented
|
201
|
+
|
202
|
+
def __add__(self, other):
|
203
|
+
if isinstance(other, str):
|
204
|
+
# Create a new instance of the same class with the combined content
|
205
|
+
return type(self)(text=self.content + other, name=self.name, visible=self.visible, newline=self.newline)
|
206
|
+
elif isinstance(other, Message):
|
207
|
+
new_items = [self] + other.provider()
|
208
|
+
return type(other)(*new_items)
|
209
|
+
return NotImplemented
|
210
|
+
|
195
211
|
class Tools(ContextProvider):
|
196
212
|
def __init__(self, tools_json: Optional[List[Dict]] = None, name: str = "tools", visible: bool = True):
|
197
213
|
super().__init__(name, visible=visible)
|
@@ -711,7 +727,27 @@ class Messages:
|
|
711
727
|
|
712
728
|
def render(self) -> List[Dict[str, Any]]:
|
713
729
|
results = [msg.to_dict() for msg in self._messages]
|
714
|
-
|
730
|
+
non_empty_results = [res for res in results if res]
|
731
|
+
|
732
|
+
if not non_empty_results:
|
733
|
+
return []
|
734
|
+
|
735
|
+
merged_results = [non_empty_results[0]]
|
736
|
+
for i in range(1, len(non_empty_results)):
|
737
|
+
current_msg = non_empty_results[i]
|
738
|
+
last_merged_msg = merged_results[-1]
|
739
|
+
|
740
|
+
# Merge if roles match, no tool_calls, and content is string
|
741
|
+
if (current_msg.get('role') == last_merged_msg.get('role') and
|
742
|
+
'tool_calls' not in current_msg and
|
743
|
+
'tool_calls' not in last_merged_msg and
|
744
|
+
isinstance(current_msg.get('content'), str) and
|
745
|
+
isinstance(last_merged_msg.get('content'), str)):
|
746
|
+
last_merged_msg['content'] += current_msg.get('content', '')
|
747
|
+
else:
|
748
|
+
merged_results.append(current_msg)
|
749
|
+
|
750
|
+
return merged_results
|
715
751
|
|
716
752
|
async def render_latest(self) -> List[Dict[str, Any]]:
|
717
753
|
await self.refresh()
|
@@ -9,7 +9,7 @@ sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')
|
|
9
9
|
|
10
10
|
|
11
11
|
from architext import *
|
12
|
-
|
12
|
+
from typing import Optional, Union, Callable
|
13
13
|
# ==============================================================================
|
14
14
|
# 单元测试部分
|
15
15
|
# ==============================================================================
|
@@ -1709,6 +1709,79 @@ Files: {Files(visible=True, name="files")}
|
|
1709
1709
|
rendered_tool_2 = await tool_results_msg.render()
|
1710
1710
|
self.assertEqual(rendered_tool_1, rendered_tool_2)
|
1711
1711
|
|
1712
|
+
async def test_zag_iadd_on_provider(self):
|
1713
|
+
"""测试对 provider 使用 += 操作符来追加文本"""
|
1714
|
+
class Goal(Texts):
|
1715
|
+
def __init__(self, text: Optional[Union[str, Callable[[], str]]] = None, name: str = "goal"):
|
1716
|
+
super().__init__(text=text, name=name)
|
1717
|
+
|
1718
|
+
async def render(self) -> Optional[str]:
|
1719
|
+
content = await super().render()
|
1720
|
+
if content is None:
|
1721
|
+
return None
|
1722
|
+
return f"<goal>{content}</goal>"
|
1723
|
+
|
1724
|
+
messages = Messages(UserMessage(Goal("hi")))
|
1725
|
+
|
1726
|
+
# This is the new syntax we want to test
|
1727
|
+
goal_provider = messages.provider("goal")
|
1728
|
+
goal_provider += "test"
|
1729
|
+
|
1730
|
+
rendered = await messages.render_latest()
|
1731
|
+
|
1732
|
+
self.assertEqual(len(rendered), 1)
|
1733
|
+
self.assertEqual(rendered[0]['content'], "<goal>hitest</goal>")
|
1734
|
+
|
1735
|
+
async def test_zz_user_message_auto_merging(self):
|
1736
|
+
"""测试连续的UserMessage是否能自动合并"""
|
1737
|
+
# 场景1: 初始化时合并
|
1738
|
+
messages_init = Messages(UserMessage("hi"), UserMessage("hi2"))
|
1739
|
+
self.assertEqual(len(messages_init), 1, "初始化时,两个连续的UserMessage应该合并为一个")
|
1740
|
+
self.assertEqual(len(messages_init[0]), 2, "合并后的UserMessage应该包含两个Texts provider")
|
1741
|
+
|
1742
|
+
rendered_init = await messages_init.render_latest()
|
1743
|
+
self.assertEqual(rendered_init[0]['content'], "hihi2", "合并后渲染的内容不正确")
|
1744
|
+
|
1745
|
+
# 场景2: 追加时合并
|
1746
|
+
messages_append = Messages(UserMessage("hi"))
|
1747
|
+
messages_append.append(UserMessage("hi2"))
|
1748
|
+
self.assertEqual(len(messages_append), 1, "追加时,两个连续的UserMessage应该合并为一个")
|
1749
|
+
self.assertEqual(len(messages_append[0]), 2, "追加合并后的UserMessage应该包含两个Texts provider")
|
1750
|
+
|
1751
|
+
rendered_append = await messages_append.render_latest()
|
1752
|
+
self.assertEqual(rendered_append[0]['content'], "hihi2", "追加合并后渲染的内容不正确")
|
1753
|
+
|
1754
|
+
# 场景3: 追加RoleMessage时合并
|
1755
|
+
messages_append.append(RoleMessage("user", "hi3"))
|
1756
|
+
self.assertEqual(len(messages_append), 1, "追加RoleMessage时,连续的UserMessage应该合并为一个")
|
1757
|
+
self.assertEqual(len(messages_append[0]), 3, "追加RoleMessage合并后的UserMessage应该包含三个Texts provider")
|
1758
|
+
|
1759
|
+
rendered_append_role = await messages_append.render_latest()
|
1760
|
+
self.assertEqual(rendered_append_role[0]['content'], "hihi2hi3", "追加RoleMessage合并后渲染的内容不正确")
|
1761
|
+
|
1762
|
+
# 场景4: 追加包含ContextProvider和字符串组合的RoleMessage时合并
|
1763
|
+
class Goal(Texts):
|
1764
|
+
def __init__(self, text: Optional[Union[str, Callable[[], str]]] = None, name: str = "goal", visible: bool = True, newline: bool = False):
|
1765
|
+
super().__init__(text=text, name=name, visible=visible, newline=newline)
|
1766
|
+
|
1767
|
+
async def render(self) -> Optional[str]:
|
1768
|
+
content = await super().render()
|
1769
|
+
if content is None:
|
1770
|
+
return None
|
1771
|
+
return f"<goal>{content}</goal>"
|
1772
|
+
|
1773
|
+
messages_append.append(RoleMessage("user", Goal("goal") + "hi4"))
|
1774
|
+
self.assertEqual(len(messages_append), 1, "追加(ContextProvider + str)的RoleMessage时,未能正确合并")
|
1775
|
+
self.assertEqual(len(messages_append[0]), 4, "追加(ContextProvider + str)的RoleMessage合并后的provider数量不正确")
|
1776
|
+
|
1777
|
+
rendered_append_combo = await messages_append.render_latest()
|
1778
|
+
self.assertEqual(rendered_append_combo[0]['content'], "hihi2hi3<goal>goalhi4</goal>", "追加(ContextProvider + str)合并后渲染的内容不正确")
|
1779
|
+
|
1780
|
+
# 场景5: 被空消息隔开的同角色消息在渲染时合并
|
1781
|
+
messages_separated = Messages(UserMessage("hi"), AssistantMessage(""), UserMessage("hi2"))
|
1782
|
+
rendered_separated = await messages_separated.render_latest()
|
1783
|
+
self.assertEqual(len(rendered_separated), 1, "被空消息隔开的同角色消息在渲染时应该合并")
|
1784
|
+
self.assertEqual(rendered_separated[0]['content'], "hihi2", "被空消息隔开的同角色消息合并后内容不正确")
|
1712
1785
|
|
1713
1786
|
# ==============================================================================
|
1714
1787
|
# 6. 演示
|
@@ -12,7 +12,7 @@ from typing import Union, Optional, Callable
|
|
12
12
|
from .base import BaseLLM
|
13
13
|
from ..plugins.registry import registry
|
14
14
|
from ..plugins import PLUGINS, get_tools_result_async, function_call_list, update_tools_config
|
15
|
-
from ..utils.scripts import safe_get, async_generator_to_sync, parse_function_xml, parse_continuous_json, convert_functions_to_xml, remove_xml_tags_and_content
|
15
|
+
from ..utils.scripts import safe_get, async_generator_to_sync, parse_function_xml, parse_continuous_json, convert_functions_to_xml, remove_xml_tags_and_content, find_most_frequent_phrase
|
16
16
|
from ..core.request import prepare_request_payload
|
17
17
|
from ..core.response import fetch_response_stream, fetch_response
|
18
18
|
from ..architext.architext import Messages, SystemMessage, UserMessage, AssistantMessage, ToolCalls, ToolResults, Texts, RoleMessage, Images, Files
|
@@ -81,6 +81,14 @@ class TaskComplete(Exception):
|
|
81
81
|
super().__init__(f"Task completed with message: {message}")
|
82
82
|
|
83
83
|
|
84
|
+
class RepetitiveResponseError(Exception):
|
85
|
+
"""Custom exception for detecting repetitive and meaningless generated strings."""
|
86
|
+
def __init__(self, message, phrase, count):
|
87
|
+
super().__init__(message)
|
88
|
+
self.phrase = phrase
|
89
|
+
self.count = count
|
90
|
+
|
91
|
+
|
84
92
|
class chatgpt(BaseLLM):
|
85
93
|
"""
|
86
94
|
Official ChatGPT API
|
@@ -172,7 +180,7 @@ class chatgpt(BaseLLM):
|
|
172
180
|
"""
|
173
181
|
Add a message to the conversation
|
174
182
|
"""
|
175
|
-
# self.logger.info(f"role: {role}, function_name: {function_name}, message: {message}")
|
183
|
+
# self.logger.info(f"role: {role}, function_name: {function_name}, message: {message}, function_arguments: {function_arguments}")
|
176
184
|
if convo_id not in self.conversation:
|
177
185
|
self.reset(convo_id=convo_id)
|
178
186
|
if function_name == "" and message:
|
@@ -276,9 +284,9 @@ class chatgpt(BaseLLM):
|
|
276
284
|
}
|
277
285
|
|
278
286
|
done_message = self.conversation[convo_id].provider("done")
|
279
|
-
if
|
287
|
+
if done_message:
|
280
288
|
done_message.visible = False
|
281
|
-
if self.conversation[convo_id][-1][-1].name == "done":
|
289
|
+
if self.check_done and self.conversation[convo_id][-1][-1].name == "done":
|
282
290
|
self.conversation[convo_id][-1][-1].visible = True
|
283
291
|
|
284
292
|
# 构造请求数据
|
@@ -438,6 +446,13 @@ class chatgpt(BaseLLM):
|
|
438
446
|
|
439
447
|
if not full_response.strip() and not need_function_call:
|
440
448
|
raise EmptyResponseError("Response is empty")
|
449
|
+
most_frequent_phrase, most_frequent_phrase_count = find_most_frequent_phrase(full_response)
|
450
|
+
if most_frequent_phrase_count > 100:
|
451
|
+
raise RepetitiveResponseError(
|
452
|
+
f"Detected repetitive and meaningless content. The phrase '{most_frequent_phrase}' appeared {most_frequent_phrase_count} times.",
|
453
|
+
most_frequent_phrase,
|
454
|
+
most_frequent_phrase_count
|
455
|
+
)
|
441
456
|
|
442
457
|
if self.print_log:
|
443
458
|
self.logger.info(f"total_tokens: {total_tokens}")
|
@@ -612,6 +627,11 @@ class chatgpt(BaseLLM):
|
|
612
627
|
elif tool_name == "get_knowledge_graph_tree":
|
613
628
|
self.conversation[convo_id].provider("knowledge_graph").visible = True
|
614
629
|
final_tool_response = "Get knowledge graph tree successfully! The knowledge graph tree has been updated in the tag <knowledge_graph_tree>."
|
630
|
+
elif tool_name.endswith("goal"):
|
631
|
+
goal_provider = self.conversation[convo_id].provider("goal")
|
632
|
+
if goal_provider:
|
633
|
+
goal_provider += tool_response
|
634
|
+
final_tool_response = "Get goal successfully! The goal has been updated in the tag <goal>."
|
615
635
|
elif tool_name == "write_to_file":
|
616
636
|
tool_args = None
|
617
637
|
elif tool_name == "read_image":
|
@@ -800,6 +820,9 @@ class chatgpt(BaseLLM):
|
|
800
820
|
except EmptyResponseError as e:
|
801
821
|
self.logger.warning(f"{e}, retrying...")
|
802
822
|
continue
|
823
|
+
except RepetitiveResponseError as e:
|
824
|
+
self.logger.warning(f"{e}, retrying...")
|
825
|
+
continue
|
803
826
|
except TaskComplete as e:
|
804
827
|
raise
|
805
828
|
except ModelNotFoundError as e:
|
@@ -3,10 +3,65 @@ import re
|
|
3
3
|
import json
|
4
4
|
import fnmatch
|
5
5
|
import requests
|
6
|
+
import collections
|
6
7
|
import urllib.parse
|
7
8
|
|
8
9
|
from ..core.utils import get_image_message
|
9
10
|
|
11
|
+
def find_most_frequent_phrase(s, min_len=4, max_phrase_len=20):
|
12
|
+
"""
|
13
|
+
查找字符串中出现次数最多的短语(单词序列)。
|
14
|
+
此版本经过性能优化,并增加了最大短语长度限制。
|
15
|
+
|
16
|
+
Args:
|
17
|
+
s: 输入字符串。
|
18
|
+
min_len: 短语的最小字符长度。
|
19
|
+
max_phrase_len: 要搜索的最大短语长度(以单词为单位)。
|
20
|
+
|
21
|
+
Returns:
|
22
|
+
一个元组 (most_frequent_phrase, count),其中
|
23
|
+
most_frequent_phrase 是出现次数最多的短语,
|
24
|
+
count 是它的出现次数。
|
25
|
+
如果没有找到符合条件的重复短语,则返回 ("", 0)。
|
26
|
+
"""
|
27
|
+
# start_time = time.time()
|
28
|
+
if not s or len(s) < min_len:
|
29
|
+
return "", 0
|
30
|
+
|
31
|
+
words = [word for word in re.split(r'[\s\n]+', s) if word]
|
32
|
+
if not words:
|
33
|
+
return "", 0
|
34
|
+
n = len(words)
|
35
|
+
|
36
|
+
phrase_counts = collections.defaultdict(int)
|
37
|
+
|
38
|
+
# 确定要检查的实际最大长度
|
39
|
+
effective_max_len = min(n // 2, max_phrase_len)
|
40
|
+
|
41
|
+
# 优化的核心:直接在单词列表上生成并统计所有可能的短语(n-grams)
|
42
|
+
for length in range(1, effective_max_len + 1):
|
43
|
+
for i in range(n - length + 1):
|
44
|
+
phrase_tuple = tuple(words[i:i + length])
|
45
|
+
phrase_counts[phrase_tuple] += 1
|
46
|
+
# 筛选出重复次数大于1且满足最小长度要求的短语
|
47
|
+
best_phrase = ""
|
48
|
+
max_count = 0
|
49
|
+
|
50
|
+
for phrase_tuple, count in phrase_counts.items():
|
51
|
+
if count > 1:
|
52
|
+
phrase = " ".join(phrase_tuple)
|
53
|
+
if len(phrase) >= min_len:
|
54
|
+
if count > max_count:
|
55
|
+
max_count = count
|
56
|
+
best_phrase = phrase
|
57
|
+
elif count == max_count and len(phrase) > len(best_phrase):
|
58
|
+
best_phrase = phrase
|
59
|
+
|
60
|
+
if max_count > 0:
|
61
|
+
return best_phrase, max_count
|
62
|
+
else:
|
63
|
+
return "", 0
|
64
|
+
|
10
65
|
def get_doc_from_url(url):
|
11
66
|
filename = urllib.parse.unquote(url.split("/")[-1])
|
12
67
|
response = requests.get(url, stream=True)
|
@@ -33,13 +33,11 @@ aient/plugins/excute_command.py
|
|
33
33
|
aient/plugins/get_time.py
|
34
34
|
aient/plugins/image.py
|
35
35
|
aient/plugins/list_directory.py
|
36
|
-
aient/plugins/read_file.py
|
37
36
|
aient/plugins/read_image.py
|
38
37
|
aient/plugins/readonly.py
|
39
38
|
aient/plugins/registry.py
|
40
39
|
aient/plugins/run_python.py
|
41
40
|
aient/plugins/websearch.py
|
42
|
-
aient/plugins/write_file.py
|
43
41
|
aient/utils/__init__.py
|
44
42
|
aient/utils/prompt.py
|
45
43
|
aient/utils/scripts.py
|
@@ -1,198 +0,0 @@
|
|
1
|
-
import os
|
2
|
-
import json
|
3
|
-
import chardet
|
4
|
-
from pdfminer.high_level import extract_text
|
5
|
-
|
6
|
-
from .registry import register_tool
|
7
|
-
|
8
|
-
# 读取文件内容
|
9
|
-
@register_tool()
|
10
|
-
def read_file(file_path, head: int = None):
|
11
|
-
"""
|
12
|
-
Description: Request to read the contents of a file at the specified path. Use this when you need to examine the contents of an existing file you do not know the contents of, for example to analyze code, review text files, or extract information from configuration files. Automatically extracts raw text from PDF and DOCX files. May not be suitable for other types of binary files, as it returns the raw content as a string.
|
13
|
-
|
14
|
-
注意:
|
15
|
-
1. pdf 文件 必须使用 read_file 读取,可以使用 read_file 直接读取 PDF。
|
16
|
-
|
17
|
-
参数:
|
18
|
-
file_path: 要读取的文件路径,(required) The path of the file to read (relative to the current working directory)
|
19
|
-
head: (可选) 读取文件的前N行,默认为None,读取整个文件
|
20
|
-
|
21
|
-
返回:
|
22
|
-
文件内容的字符串
|
23
|
-
|
24
|
-
Usage:
|
25
|
-
<read_file>
|
26
|
-
<file_path>File path here</file_path>
|
27
|
-
</read_file>
|
28
|
-
|
29
|
-
Examples:
|
30
|
-
|
31
|
-
1. Reading an entire file:
|
32
|
-
<read_file>
|
33
|
-
<file_path>frontend.pdf</file_path>
|
34
|
-
</read_file>
|
35
|
-
|
36
|
-
2. Reading multiple files:
|
37
|
-
|
38
|
-
<read_file>
|
39
|
-
<file_path>frontend-config.json</file_path>
|
40
|
-
</read_file>
|
41
|
-
|
42
|
-
<read_file>
|
43
|
-
<file_path>backend-config.txt</file_path>
|
44
|
-
</read_file>
|
45
|
-
|
46
|
-
...
|
47
|
-
|
48
|
-
<read_file>
|
49
|
-
<file_path>README.md</file_path>
|
50
|
-
</read_file>
|
51
|
-
"""
|
52
|
-
try:
|
53
|
-
# 检查文件是否存在
|
54
|
-
if not os.path.exists(file_path):
|
55
|
-
return f"<tool_error>文件 '{file_path}' 不存在</tool_error>"
|
56
|
-
|
57
|
-
# 检查是否为文件
|
58
|
-
if not os.path.isfile(file_path):
|
59
|
-
return f"<tool_error>'{file_path}' 不是一个文件</tool_error>"
|
60
|
-
|
61
|
-
# 检查文件扩展名
|
62
|
-
if file_path.lower().endswith('.pdf'):
|
63
|
-
# 提取PDF文本
|
64
|
-
text_content = extract_text(file_path)
|
65
|
-
|
66
|
-
# 如果提取结果为空
|
67
|
-
if not text_content:
|
68
|
-
return f"<tool_error>无法从 '{file_path}' 提取文本内容</tool_error>"
|
69
|
-
elif file_path.lower().endswith('.ipynb'):
|
70
|
-
try:
|
71
|
-
with open(file_path, 'r', encoding='utf-8') as file:
|
72
|
-
notebook_content = json.load(file)
|
73
|
-
|
74
|
-
for cell in notebook_content.get('cells', []):
|
75
|
-
if cell.get('cell_type') == 'code' and 'outputs' in cell:
|
76
|
-
filtered_outputs = []
|
77
|
-
for output in cell.get('outputs', []):
|
78
|
-
new_output = output.copy()
|
79
|
-
if 'data' in new_output:
|
80
|
-
original_data = new_output['data']
|
81
|
-
filtered_data = {}
|
82
|
-
for key, value in original_data.items():
|
83
|
-
if key.startswith('image/'):
|
84
|
-
continue
|
85
|
-
if key == 'text/html':
|
86
|
-
html_content = "".join(value) if isinstance(value, list) else value
|
87
|
-
if isinstance(html_content, str) and '<table class="show_videos"' in html_content:
|
88
|
-
continue
|
89
|
-
filtered_data[key] = value
|
90
|
-
if filtered_data:
|
91
|
-
new_output['data'] = filtered_data
|
92
|
-
filtered_outputs.append(new_output)
|
93
|
-
elif 'output_type' in new_output and new_output['output_type'] in ['stream', 'error']:
|
94
|
-
filtered_outputs.append(new_output)
|
95
|
-
|
96
|
-
cell['outputs'] = filtered_outputs
|
97
|
-
|
98
|
-
text_content = json.dumps(notebook_content, indent=2, ensure_ascii=False)
|
99
|
-
except json.JSONDecodeError:
|
100
|
-
return f"<tool_error>文件 '{file_path}' 不是有效的JSON格式 (IPython Notebook)。</tool_error>"
|
101
|
-
except Exception as e:
|
102
|
-
return f"<tool_error>处理IPython Notebook文件 '{file_path}' 时发生错误: {e}</tool_error>"
|
103
|
-
else:
|
104
|
-
# 更新:修改通用文件读取逻辑以支持多种编码
|
105
|
-
# 这部分替换了原有的 else 块内容
|
106
|
-
try:
|
107
|
-
with open(file_path, 'rb') as file: # 以二进制模式读取
|
108
|
-
raw_data = file.read()
|
109
|
-
|
110
|
-
if not raw_data: # 处理空文件
|
111
|
-
text_content = ""
|
112
|
-
else:
|
113
|
-
detected_info = chardet.detect(raw_data)
|
114
|
-
primary_encoding_to_try = detected_info['encoding']
|
115
|
-
confidence = detected_info['confidence']
|
116
|
-
|
117
|
-
decoded_successfully = False
|
118
|
-
|
119
|
-
# 尝试1: 使用检测到的编码 (如果置信度高且编码有效)
|
120
|
-
if primary_encoding_to_try and confidence > 0.7: # 您可以根据需要调整置信度阈值
|
121
|
-
try:
|
122
|
-
text_content = raw_data.decode(primary_encoding_to_try)
|
123
|
-
decoded_successfully = True
|
124
|
-
except (UnicodeDecodeError, LookupError): # LookupError 用于处理无效的编码名称
|
125
|
-
# 解码失败,将尝试后备编码
|
126
|
-
pass
|
127
|
-
|
128
|
-
# 尝试2: UTF-8 (如果第一次尝试失败或未进行)
|
129
|
-
if not decoded_successfully:
|
130
|
-
try:
|
131
|
-
text_content = raw_data.decode('utf-8')
|
132
|
-
decoded_successfully = True
|
133
|
-
except UnicodeDecodeError:
|
134
|
-
# 解码失败,将尝试下一个后备编码
|
135
|
-
pass
|
136
|
-
|
137
|
-
# 尝试3: UTF-16 (如果之前的尝试都失败)
|
138
|
-
# 'utf-16' 会处理带BOM的LE/BE编码。若无BOM,则假定为本机字节序。
|
139
|
-
# chardet 通常能更准确地检测具体的 utf-16le 或 utf-16be。
|
140
|
-
if not decoded_successfully:
|
141
|
-
try:
|
142
|
-
text_content = raw_data.decode('utf-16')
|
143
|
-
decoded_successfully = True
|
144
|
-
except UnicodeDecodeError:
|
145
|
-
# 所有主要尝试都失败
|
146
|
-
pass
|
147
|
-
|
148
|
-
if not decoded_successfully:
|
149
|
-
# 所有尝试均失败后的错误信息
|
150
|
-
detected_str_part = ""
|
151
|
-
if primary_encoding_to_try and confidence > 0.7: # 如果有高置信度的检测结果
|
152
|
-
detected_str_part = f"检测到的编码 '{primary_encoding_to_try}' (置信度 {confidence:.2f}), "
|
153
|
-
elif primary_encoding_to_try: # 如果有检测结果但置信度低
|
154
|
-
detected_str_part = f"低置信度检测编码 '{primary_encoding_to_try}' (置信度 {confidence:.2f}), "
|
155
|
-
|
156
|
-
return f"<tool_error>文件 '{file_path}' 无法解码。已尝试: {detected_str_part}UTF-8, UTF-16。</tool_error>"
|
157
|
-
|
158
|
-
except FileNotFoundError:
|
159
|
-
# 此处不太可能触发 FileNotFoundError,因为函数开头已有 os.path.exists 检查
|
160
|
-
return f"<tool_error>文件 '{file_path}' 在读取过程中未找到。</tool_error>"
|
161
|
-
except Exception as e:
|
162
|
-
# 捕获在此块中可能发生的其他错误,例如未被早期检查捕获的文件读取问题
|
163
|
-
return f"<tool_error>处理通用文件 '{file_path}' 时发生错误: {e}</tool_error>"
|
164
|
-
|
165
|
-
if head is not None:
|
166
|
-
try:
|
167
|
-
num_lines = int(head)
|
168
|
-
if num_lines > 0:
|
169
|
-
lines = text_content.splitlines(True)
|
170
|
-
return "".join(lines[:num_lines])
|
171
|
-
except (ValueError, TypeError):
|
172
|
-
# Invalid head value, ignore and proceed with normal logic.
|
173
|
-
pass
|
174
|
-
|
175
|
-
# if file_path.lower().endswith('.csv'):
|
176
|
-
# lines = text_content.splitlines(True)
|
177
|
-
# if len(lines) > 500:
|
178
|
-
# top_lines = lines[:250]
|
179
|
-
# bottom_lines = lines[-250:]
|
180
|
-
# omitted_count = len(lines) - 500
|
181
|
-
# text_content = "".join(top_lines) + f"\n... (中间省略了 {omitted_count} 行) ...\n" + "".join(bottom_lines)
|
182
|
-
|
183
|
-
# 返回文件内容
|
184
|
-
return text_content
|
185
|
-
|
186
|
-
except PermissionError:
|
187
|
-
return f"<tool_error>没有权限访问文件 '{file_path}'</tool_error>"
|
188
|
-
except UnicodeDecodeError:
|
189
|
-
# 更新:修改全局 UnicodeDecodeError 错误信息使其更通用
|
190
|
-
return f"<tool_error>文件 '{file_path}' 包含无法解码的字符 (UnicodeDecodeError)。</tool_error>"
|
191
|
-
except Exception as e:
|
192
|
-
return f"<tool_error>读取文件时发生错误: {e}</tool_error>"
|
193
|
-
|
194
|
-
if __name__ == "__main__":
|
195
|
-
# python -m beswarm.aient.aient.plugins.read_file
|
196
|
-
result = read_file("./work/cax/Lenia Notebook.ipynb", head=10)
|
197
|
-
print(result)
|
198
|
-
print(len(result))
|
@@ -1,90 +0,0 @@
|
|
1
|
-
from .registry import register_tool
|
2
|
-
from ..utils.scripts import unescape_html
|
3
|
-
|
4
|
-
import os
|
5
|
-
|
6
|
-
@register_tool()
|
7
|
-
def write_to_file(path, content, mode='w', newline=False):
|
8
|
-
"""
|
9
|
-
## write_to_file
|
10
|
-
Description: Request to write full content to a file at the specified path. If the file exists, it will be overwritten with the provided content. If the file doesn't exist, it will be created. This tool will automatically create any directories needed to write the file.
|
11
|
-
Parameters:
|
12
|
-
- path: (required) The path of the file to write to (relative to the current working directory ${args.cwd})
|
13
|
-
- content: (required) The content to write to the file. ALWAYS provide the COMPLETE intended content of the file, without any truncation or omissions. You MUST include ALL parts of the file, even if they haven't been modified. Do NOT include the line numbers in the content though, just the actual content of the file.
|
14
|
-
- mode: (optional) The mode to write to the file. Default is 'w'. 'w' for write, 'a' for append.
|
15
|
-
- newline: (optional) Whether to add a newline before the content. Default is False.
|
16
|
-
Usage:
|
17
|
-
<write_to_file>
|
18
|
-
<path>File path here</path>
|
19
|
-
<content>
|
20
|
-
Your file content here
|
21
|
-
</content>
|
22
|
-
<mode>w</mode>
|
23
|
-
<newline>False</newline>
|
24
|
-
</write_to_file>
|
25
|
-
|
26
|
-
Example: Requesting to write to frontend-config.json
|
27
|
-
<write_to_file>
|
28
|
-
<path>frontend-config.json</path>
|
29
|
-
<content>
|
30
|
-
{
|
31
|
-
"apiEndpoint": "https://api.example.com",
|
32
|
-
"theme": {
|
33
|
-
"primaryColor": "#007bff",
|
34
|
-
"secondaryColor": "#6c757d",
|
35
|
-
"fontFamily": "Arial, sans-serif"
|
36
|
-
},
|
37
|
-
"features": {
|
38
|
-
"darkMode": true,
|
39
|
-
"notifications": true,
|
40
|
-
"analytics": false
|
41
|
-
},
|
42
|
-
"version": "1.0.0"
|
43
|
-
}
|
44
|
-
</content>
|
45
|
-
</write_to_file>
|
46
|
-
"""
|
47
|
-
# 确保目录存在
|
48
|
-
os.makedirs(os.path.dirname(path) or '.', exist_ok=True)
|
49
|
-
|
50
|
-
if content.startswith("##") and (path.endswith(".md") or path.endswith(".txt")):
|
51
|
-
content = "\n\n" + content
|
52
|
-
|
53
|
-
if content.startswith("---\n") and (path.endswith(".md") or path.endswith(".txt")):
|
54
|
-
content = "\n" + content
|
55
|
-
|
56
|
-
if newline:
|
57
|
-
content = '\n' + content
|
58
|
-
|
59
|
-
# 写入文件
|
60
|
-
try:
|
61
|
-
with open(path, mode, encoding='utf-8') as file:
|
62
|
-
file.write(unescape_html(content))
|
63
|
-
except PermissionError as e:
|
64
|
-
return f"<tool_error>写入文件失败: {e}</tool_error>"
|
65
|
-
|
66
|
-
return f"已成功写入文件:{path}"
|
67
|
-
|
68
|
-
|
69
|
-
if __name__ == "__main__":
|
70
|
-
text = """
|
71
|
-
<!DOCTYPE html>
|
72
|
-
<html lang="zh-CN">
|
73
|
-
<head>
|
74
|
-
<meta charset="UTF-8">
|
75
|
-
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
76
|
-
<title>Continuous Thought Machines (CTM) 原理解读</title>
|
77
|
-
<script>MathJax={chtml:{fontURL:'https://cdn.jsdelivr.net/npm/mathjax@3/es5/output/chtml/fonts/woff-v2'}}</script>
|
78
|
-
<script src="https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml.js" id="MathJax-script" async></script>
|
79
|
-
<script src="https://cdnjs.cloudflare.com/ajax/libs/viz.js/2.1.2/viz.js" defer></script>
|
80
|
-
<script src="https://cdnjs.cloudflare.com/ajax/libs/viz.js/2.1.2/full.render.js" defer></script>
|
81
|
-
<script src="https://unpkg.com/@panzoom/panzoom@4.5.1/dist/panzoom.min.js" defer></script>
|
82
|
-
<link href="https://cdnjs.cloudflare.com/ajax/libs/prism/1.29.0/themes/prism-okaidia.min.css" rel="stylesheet"/>
|
83
|
-
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=Fira+Code:wght@400;500&display=swap" rel="stylesheet">
|
84
|
-
<link href="https://fonts.googleapis.com/icon?family=Material+Icons+Outlined" rel="stylesheet">
|
85
|
-
<style>
|
86
|
-
"""
|
87
|
-
with open("test.txt", "r", encoding="utf-8") as file:
|
88
|
-
content = file.read()
|
89
|
-
print(write_to_file("test.txt", content))
|
90
|
-
# python -m beswarm.aient.aient.plugins.write_file
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|