PyPI - beswarm - Versions diffs - 0.1.51__py3-none-any.whl → 0.1.53__py3-none-any.whl - Mend

beswarm 0.1.51py3-none-any.whl → 0.1.53py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

beswarm/aient/setup.py CHANGED Viewed

@@ -4,7 +4,7 @@ from setuptools import setup, find_packages
 setup(
     name="aient",
-    version="1.1.7",
+    version="1.1.9",
     description="Aient: The Awakening of Agent.",
     long_description=Path.open(Path("README.md"), encoding="utf-8").read(),
     long_description_content_type="text/markdown",

beswarm/aient/src/aient/plugins/websearch.py CHANGED Viewed

@@ -124,10 +124,10 @@ def jina_ai_Web_crawler(url: str, isSearch=False) -> str:
 @register_tool()
 def get_url_content(url: str) -> str:
     """
-    获取 url 的网页内容，以 markdown 格式返回给用户
+获取 url 的网页内容，以 markdown 格式返回给用户
-    :param url: 要爬取的网页URL
-    :return: 网页内容
+:param url: 要爬取的网页URL
+:return: 网页内容
     """
     markdown_content = url_to_markdown(url)
     # print(markdown_content)

beswarm/aient/src/aient/plugins/write_file.py CHANGED Viewed

@@ -1,6 +1,19 @@
 from .registry import register_tool
 import os
+import html
+def unescape_html(input_string: str) -> str:
+  """
+  将字符串中的 HTML 实体（例如 &amp;）转换回其原始字符（例如 &）。
+  Args:
+    input_string: 包含 HTML 实体的输入字符串。
+  Returns:
+    转换后的字符串。
+  """
+  return html.unescape(input_string)
 @register_tool()
 def write_to_file(path, content, mode='w'):
@@ -49,6 +62,30 @@ Example: Requesting to write to frontend-config.json
     # 写入文件
     with open(path, mode, encoding='utf-8') as file:
-        file.write(content)
+        file.write(unescape_html(content))
-    return f"已成功写入文件：{path}"
+    return f"已成功写入文件：{path}"
+if __name__ == "__main__":
+    text = """
+&lt;!DOCTYPE html&gt;
+&lt;html lang=&quot;zh-CN&quot;&gt;
+&lt;head&gt;
+    &lt;meta charset=&quot;UTF-8&quot;&gt;
+    &lt;meta name=&quot;viewport&quot; content=&quot;width=device-width, initial-scale=1.0&quot;&gt;
+    &lt;title&gt;Continuous Thought Machines (CTM) 原理解读&lt;/title&gt;
+    &lt;script&gt;MathJax={chtml:{fontURL:'https://cdn.jsdelivr.net/npm/mathjax@3/es5/output/chtml/fonts/woff-v2'}}&lt;/script&gt;
+    &lt;script src=&quot;https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-chtml.js&quot; id=&quot;MathJax-script&quot; async&gt;&lt;/script&gt;
+    &lt;script src=&quot;https://cdnjs.cloudflare.com/ajax/libs/viz.js/2.1.2/viz.js&quot; defer&gt;&lt;/script&gt;
+    &lt;script src=&quot;https://cdnjs.cloudflare.com/ajax/libs/viz.js/2.1.2/full.render.js&quot; defer&gt;&lt;/script&gt;
+    &lt;script src=&quot;https://unpkg.com/@panzoom/panzoom@4.5.1/dist/panzoom.min.js&quot; defer&gt;&lt;/script&gt;
+    &lt;link href=&quot;https://cdnjs.cloudflare.com/ajax/libs/prism/1.29.0/themes/prism-okaidia.min.css&quot; rel=&quot;stylesheet&quot;/&gt;
+    &lt;link href=&quot;https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&amp;family=Fira+Code:wght@400;500&amp;display=swap&quot; rel=&quot;stylesheet&quot;&gt;
+    &lt;link href=&quot;https://fonts.googleapis.com/icon?family=Material+Icons+Outlined&quot; rel=&quot;stylesheet&quot;&gt;
+&lt;style&gt;
+    """
+    with open("test.txt", "r", encoding="utf-8") as file:
+        content = file.read()
+    print(write_to_file("test.txt", content))
+    # python -m beswarm.aient.src.aient.plugins.write_file

beswarm/tools/__init__.py CHANGED Viewed

@@ -6,6 +6,7 @@ from .UIworker import UIworker
 from .search_arxiv import search_arxiv
 from .repomap import get_code_repo_map
 from .click import find_and_click_element, scroll_screen
+from .search_web import search_web
 #显式导入 aient.plugins 中的所需内容
 from ..aient.src.aient.plugins import (
     excute_command,
@@ -42,4 +43,5 @@ __all__ = [
     "scroll_screen",
     "register_tool",
     "UIworker",
+    "search_web",
 ]

beswarm/tools/search_web.py ADDED Viewed

@@ -0,0 +1,296 @@
+import re
+import os
+import json
+import httpx
+import threading
+from ..aient.src.aient.plugins import register_tool, get_url_content # Assuming a similar plugin structure
+class ThreadWithReturnValue(threading.Thread):
+    def run(self):
+        if self._target is not None:
+            self._return = self._target(*self._args, **self._kwargs)
+    def join(self):
+        super().join()
+        return self._return
+@register_tool()
+async def search_web(query: str):
+    """
+获取 Google 搜索结果。
+参数:
+    query (str): 要在 Google 上搜索的查询字符串。
+返回:
+    dict: 包含搜索结果的字典，如果发生错误则包含错误信息。
+    """
+    api_key = os.environ.get('THORDATA_KEY')
+    if not api_key:
+        raise ValueError("THORDATA_KEY is not set in environment variables")
+    api_url = "https://scraperapi.thordata.com/request"
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {api_key}"  # 请注意：硬编码的 API 密钥
+    }
+    payload = {
+        "url": f"https://www.google.com/search?q={query}"
+    }
+    results = []
+    try:
+        async with httpx.AsyncClient() as client:
+            response = await client.post(api_url, headers=headers, json=payload)
+            response.raise_for_status()  # 如果状态码是 4xx 或 5xx，则引发 HTTPStatusError
+            results = response.json()
+    except httpx.HTTPStatusError as e:
+        return {
+            "error": f"HTTP error occurred: {e.response.status_code} - {e.response.text}",
+            "status_code": e.response.status_code
+        }
+    except httpx.RequestError as e:
+        return {
+            "error": f"An error occurred while requesting {e.request.url!r}: {e}",
+            "request_url": str(e.request.url)
+        }
+    except json.JSONDecodeError:
+        return {
+            "error": "Failed to decode JSON response from the API.",
+            "response_text": response.text if 'response' in locals() else "No response text available"
+        }
+    except Exception as e:
+        return {
+            "error": f"An unexpected error occurred: {str(e)}"
+        }
+    unique_urls = []
+    if "error" in results:
+        print(f"Error fetching search results for '{query}':")
+        print(json.dumps(results, indent=2, ensure_ascii=False))
+    else:
+        # print(f"Search results for '{query}':")
+        html_content = results.get("data", {}).get("result", {}).get("html", "")
+        if html_content:
+            # 使用正则表达式查找所有 URL
+            # 导入 html 和 urllib.parse 模块
+            import html
+            import urllib.parse
+            # 1. 初步提取潜在的 URL 字符串
+            #    使用更宽容的正则，允许末尾有非URL字符，后续清理
+            candidate_urls = re.findall(r'https?://[^\s"]+|www\.[^\s"]+', html_content)
+            processed_urls = []
+            for url_str in candidate_urls:
+                # 2. 解码十六进制表示 (例如 \x26 -> &)
+                try:
+                    def replace_hex(match):
+                        return chr(int(match.group(1), 16))
+                    url_str = re.sub(r'\\x([0-9a-fA-F]{2})', replace_hex, url_str)
+                except ValueError:
+                    pass
+                # 3. 解码 HTML 实体 (例如 & -> &)
+                url_str = html.unescape(url_str)
+                # 4. 解码 URL 百分号编码 (例如 %3F -> ?, %3D -> =)
+                url_str = urllib.parse.unquote(url_str)
+                # 5. 精确截断已知的非 URL 参数或模式
+                #    截断 ved= 参数
+                if 'ved=' in url_str:
+                    url_str = url_str.split('ved=', 1)[0]
+                    url_str = url_str.rstrip('&?') # 移除可能残留的末尾 & 或 ?
+                # 6. 迭代移除末尾的 HTML 标签
+                #    例如 </cite>, <div...>, </span></span>
+                old_url_len = -1
+                while old_url_len != len(url_str): # 循环直到字符串不再变短
+                    old_url_len = len(url_str)
+                    # 移除末尾的完整闭合标签, e.g., </div>
+                    url_str = re.sub(r'</[^>]+>$', '', url_str)
+                    # 移除末尾的开始标签或不完整标签, e.g., <cite or <div
+                    # (包括 < 开头到结尾的所有内容)
+                    url_str = re.sub(r'<[^>]*$', '', url_str)
+                    # 移除末尾的 > 单独字符，如果标签移除后残留
+                    url_str = url_str.rstrip('>')
+                # 7. 移除末尾的常见非URL字符 (引号，特定标点)
+                #    注意顺序，这个应该在HTML标签移除后
+                url_str = url_str.rstrip('\'";,.?!<>()[]{}') # '<' 也在这里再次检查
+                # 8. 移除末尾单独的 '&' 字符 (在所有其他清理之后)
+                url_str = url_str.rstrip('&')
+                url_str = url_str.split("#:~:")[0]
+                if url_str: #确保URL不为空
+                    processed_urls.append(url_str)
+            # 定义要过滤的域名列表
+            excluded_domains = [
+                "www.w3.org",
+                "www.google.com",
+                "ssl.gstatic.com",
+                "translate.google.com",
+                "www.googleadservices.com",
+                "gstatic.com",
+                "lens.google.com",
+                "schema.org",
+                "id.google.com",
+                "maps.google.com",
+                "clients6.google.com",
+                "ogs.google.com",
+                "policies.google.com",
+                "support.google.com",
+                "tpc.googlesyndication.com",
+                "adssettings.google.com"
+            ]
+            final_urls_before_dedup = []
+            for url in processed_urls:
+                if not url:
+                    continue
+                if not any(excluded_domain in url for excluded_domain in excluded_domains):
+                    # 9. 进一步规范化
+                    # 9a. 移除末尾的 /
+                    normalized_url = url.rstrip('/')
+                    # 9b. 添加默认协议 (https) 如果缺失
+                    if normalized_url and not normalized_url.startswith(('http://', 'https://')):
+                        normalized_url = 'https://' + normalized_url
+                    if normalized_url:
+                         final_urls_before_dedup.append(normalized_url)
+            # 10. 去重
+            temp_unique_urls_set = set(final_urls_before_dedup)
+            temp_unique_urls_set.discard("https://baike.baidu.com")
+            temp_unique_urls_set.discard("https://zhuanlan.zhihu.com")
+            unique_urls = sorted(list(temp_unique_urls_set))
+    results = unique_urls
+    if not results:
+        return "No search results returned or results list is empty."
+    web_contents_raw = []
+    if results and isinstance(results, list) and len(results) > 0:
+        # print(f"Fetching content for {len(results)} URLs...")
+    # threads = []
+    # for url in url_set_list:
+    #     # url_search_thread = ThreadWithReturnValue(target=jina_ai_Web_crawler, args=(url,True,))
+    #     url_search_thread = ThreadWithReturnValue(target=get_url_content, args=(url,))
+    #     # url_search_thread = ThreadWithReturnValue(target=Web_crawler, args=(url,True,))
+    #     url_search_thread.start()
+    #     threads.append(url_search_thread)
+        threads = []
+        for i, link in enumerate(results):
+            print(f"Processing URL {i + 1}/{len(results)}: {link}")
+            # Assuming get_url_content is synchronous and returns a string or None
+            # content_text = get_url_content(link)
+            url_search_thread = ThreadWithReturnValue(target=get_url_content, args=(link,))
+            url_search_thread.start()
+            threads.append(url_search_thread)
+        for thread in threads:
+            content_text = thread.join()
+            # content_text = thread.get_result()
+            if content_text and len(content_text.split("\n\n")) > 10: # Ensure content_text is not None or empty before adding
+                web_contents_raw.append({"url": link, "content": str(content_text)}) # Ensure content is string
+            else:
+                print(f"Warning: Failed to get content or content was empty for URL: {link}")
+    elif not results or (isinstance(results, list) and len(results) == 0) :
+        print("No search results returned or results list is empty.")
+    else:
+        print(f"Search results in unexpected format: {type(results)}")
+    # print(f"Fetched {len(web_contents_raw)} web contents with text.")
+    if not web_contents_raw:
+        return "No web content"
+    # if not web_contents_raw:
+    #     print("No web content with text to process for similarity.")
+    #     output_filename = "web_content_filtered.json"
+    #     with open(output_filename, "w", encoding="utf-8") as f:
+    #         json.dump([], f, indent=2, ensure_ascii=False)
+    #     print(f"Empty list saved to {output_filename}")
+    #     return
+    # output_filename = "web_content.json"
+    # with open(output_filename, "w", encoding="utf-8") as f:
+    #     json.dump(web_contents_raw, f, indent=2, ensure_ascii=False)
+    n = len(web_contents_raw)
+    to_keep_flags = [True] * n  # Flags to mark which items to keep
+    # print("Starting similarity comparison...")
+    for i in range(n):
+        if not to_keep_flags[i]:  # Skip if item i is already marked for discard
+            continue
+        content_i = web_contents_raw[i].get('content', "")
+        if not isinstance(content_i, str):
+            content_i = str(content_i) # Fallback, though str(content_text) above should handle it
+        for j in range(i + 1, n):
+            if not to_keep_flags[j]:  # Skip if item j is already marked for discard
+                continue
+            content_j = web_contents_raw[j].get('content', "")
+            if not isinstance(content_j, str):
+                content_j = str(content_j) # Fallback
+            similarity = calculate_similarity(content_i, content_j)
+            if similarity > 0.9:
+                # print(f"Similarity > 0.9 ({similarity:.4f}) between content from '{web_contents_raw[i]['url']}' and '{web_contents_raw[j]['url']}'. Discarding the latter.")
+                to_keep_flags[j] = False  # Discard the second item (item j)
+    final_web_content = [web_contents_raw[i] for i in range(n) if to_keep_flags[i]]
+    # print(f"Number of items after filtering: {len(final_web_content)}")
+    # output_filename = "web_content_filtered.json"
+    # with open(output_filename, "w", encoding="utf-8") as f:
+    #     json.dump(final_web_content, f, indent=2, ensure_ascii=False)
+    # print(f"Filtered web content saved to {output_filename}")
+    final_result = ""
+    for item in final_web_content:
+        final_result += item["content"]
+        final_result += "\n\n"
+    if not final_result:
+        return "No web content"
+    return final_result
+import difflib
+def calculate_similarity(string1: str, string2: str) -> float:
+    """Calculates the similarity ratio between two strings.
+    Args:
+        string1: The first string.
+        string2: The second string.
+    Returns:
+        A float between 0 and 1, where 1 means the strings are identical
+        and 0 means they are completely different.
+    """
+    return difflib.SequenceMatcher(None, string1, string2).ratio()
+if __name__ == '__main__':
+    import asyncio
+    import re
+    async def main():
+        # 示例用法
+        search_query = "美国"
+        print(f"Performing web search for: '{search_query}'")
+        results = await search_web(search_query)  # results is a list of URLs
+        print(results)
+    asyncio.run(main())
+# python -m beswarm.tools.search_web

{beswarm-0.1.51.dist-info → beswarm-0.1.53.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: beswarm
-Version: 0.1.51
+Version: 0.1.53
 Summary: MAS
 Requires-Python: >=3.11
 Description-Content-Type: text/markdown

{beswarm-0.1.51.dist-info → beswarm-0.1.53.dist-info}/RECORD RENAMED Viewed

@@ -1,7 +1,7 @@
 beswarm/__init__.py,sha256=HZjUOJtZR5QhMuDbq-wukQQn1VrBusNWai_ysGo-VVI,20
 beswarm/utils.py,sha256=AdDCcqAIIKQEMl7PfryVgeT9G5sHe7QNsZnrvmTGA8E,283
 beswarm/aient/main.py,sha256=SiYAIgQlLJqYusnTVEJOx1WNkSJKMImhgn5aWjfroxg,3814
-beswarm/aient/setup.py,sha256=VK80NYZ8fNHRLYh2gQe-kBWCY489rUmZsqy7VHpAZAA,486
+beswarm/aient/setup.py,sha256=7hYwy55_Ncx42ns6TQjOyhMBMIFkFdbSWupWV7K00vQ,486
 beswarm/aient/src/aient/__init__.py,sha256=SRfF7oDVlOOAi6nGKiJIUK6B_arqYLO9iSMp-2IZZps,21
 beswarm/aient/src/aient/core/__init__.py,sha256=NxjebTlku35S4Dzr16rdSqSTWUvvwEeACe8KvHJnjPg,34
 beswarm/aient/src/aient/core/log_config.py,sha256=kz2_yJv1p-o3lUQOwA3qh-LSc3wMHv13iCQclw44W9c,274
@@ -32,8 +32,8 @@ beswarm/aient/src/aient/plugins/list_directory.py,sha256=5ubm-mfrj-tanGSDp4M_Tmb
 beswarm/aient/src/aient/plugins/read_file.py,sha256=-RRmaj-rSl8y--5VKnxCsZ1YQHe75OhnqvsDRLJyujM,8412
 beswarm/aient/src/aient/plugins/registry.py,sha256=YknzhieU_8nQ3oKlUSSWDB4X7t2Jx0JnqT2Jd9Xsvfk,3574
 beswarm/aient/src/aient/plugins/run_python.py,sha256=dgcUwBunMuDkaSKR5bToudVzSdrXVewktDDFUz_iIOQ,4589
-beswarm/aient/src/aient/plugins/websearch.py,sha256=I4tYU7CGLdyG6Hd3yK19V-PoG5IbFI9FEEVggyrshRg,15227
-beswarm/aient/src/aient/plugins/write_file.py,sha256=qmT6iQ3mDyVAa9Sld1jfJq0KPZj0w2kRIHq0JyjpGeA,1853
+beswarm/aient/src/aient/plugins/websearch.py,sha256=llxy1U0vJiNMiKvamMr4p7IruLb3nnDR4YErz8TYimc,15215
+beswarm/aient/src/aient/plugins/write_file.py,sha256=YRvQKMvV-5lwohxlvwt9hjfxz2dRJP85AJWAMUIqbBY,3804
 beswarm/aient/src/aient/prompt/__init__.py,sha256=GBtn6-JDT8KHFCcuPpfSNE_aGddg5p4FEyMCy4BfwGs,20
 beswarm/aient/src/aient/prompt/agent.py,sha256=6f5ZB66Rb8y0iQScHMRhvXZ1qMM3YsKpCBPCTAAw2rg,24917
 beswarm/aient/src/aient/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -119,15 +119,16 @@ beswarm/queries/tree-sitter-languages/rust-tags.scm,sha256=9ljM1nzhfPs_ZTRw7cr2P
 beswarm/queries/tree-sitter-languages/scala-tags.scm,sha256=UxQjz80JIrrJ7Pm56uUnQyThfmQNvwk7aQzPNypB-Ao,1761
 beswarm/queries/tree-sitter-languages/typescript-tags.scm,sha256=OMdCeedPiA24ky82DpgTMKXK_l2ySTuF2zrQ2fJAi9E,1253
 beswarm/tools/UIworker.py,sha256=YRrzW5GxWqA-tcmmm2c6mMbkVI0kHIqosIUz-GcoQOQ,6339
-beswarm/tools/__init__.py,sha256=-h_zoMEjnLCg9iVgSoor9BI3yK64LdVOQkVB0DgGFmo,1001
+beswarm/tools/__init__.py,sha256=EKOiLDGDrJ5GPM31SYtsYzDGSri_EINnO8M9ud0BifU,1054
 beswarm/tools/click.py,sha256=TygaekCXTmU3fIu6Uom7ZcyzEgYMlCC_GX-5SmWHuLI,20762
 beswarm/tools/edit_file.py,sha256=hfpLaE4ekDiAya0Le0fJuYa-xUefWHLTxc3F6zGZd7M,6912
 beswarm/tools/planner.py,sha256=lguBCS6kpwNPoXQvqH-WySabVubT82iyWOkJnjt6dXw,1265
 beswarm/tools/repomap.py,sha256=CwvwoN5Swr42EzrORTTeV8MMb7mPviy4a4b0fxBu50k,40828
 beswarm/tools/search_arxiv.py,sha256=9slwBemXjEqrd7-YgVmyMijPXlkhZCybEDRVhWVQ9B0,7937
+beswarm/tools/search_web.py,sha256=B24amOnGHnmdV_6S8bw8O2PdhZRRIDtJjg-wXcfP7dQ,11859
 beswarm/tools/think.py,sha256=WLw-7jNIsnS6n8MMSYUin_f-BGLENFmnKM2LISEp0co,1760
 beswarm/tools/worker.py,sha256=FfKCx7KFNbMRoAXtjU1_nJQjx9WHny7KBq8OXSYICJs,5334
-beswarm-0.1.51.dist-info/METADATA,sha256=MRqRv-NZv77ggRrhlNPiCoD8yFQES8iUyJdb1mNi_vA,3537
-beswarm-0.1.51.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
-beswarm-0.1.51.dist-info/top_level.txt,sha256=pJw4O87wvt5882smuSO6DfByJz7FJ8SxxT8h9fHCmpo,8
-beswarm-0.1.51.dist-info/RECORD,,
+beswarm-0.1.53.dist-info/METADATA,sha256=N0kOlCXH6zGdg6F7a44dI1Ao9hUmi5Gt6fDJ6Kz699Q,3537
+beswarm-0.1.53.dist-info/WHEEL,sha256=Nw36Djuh_5VDukK0H78QzOX-_FQEo6V37m3nkm96gtU,91
+beswarm-0.1.53.dist-info/top_level.txt,sha256=pJw4O87wvt5882smuSO6DfByJz7FJ8SxxT8h9fHCmpo,8
+beswarm-0.1.53.dist-info/RECORD,,

{beswarm-0.1.51.dist-info → beswarm-0.1.53.dist-info}/WHEEL RENAMED Viewed

File without changes

{beswarm-0.1.51.dist-info → beswarm-0.1.53.dist-info}/top_level.txt RENAMED Viewed

File without changes

beswarm 0.1.51__py3-none-any.whl → 0.1.53__py3-none-any.whl

beswarm 0.1.51py3-none-any.whl → 0.1.53py3-none-any.whl