pycoze 0.1.487__tar.gz → 0.1.489__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {pycoze-0.1.487 → pycoze-0.1.489}/PKG-INFO +1 -1
  2. pycoze-0.1.489/pycoze/api/lib/web.py +9 -0
  3. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze/bot/tools.py +1 -1
  4. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze/utils/__init__.py +3 -1
  5. pycoze-0.1.489/pycoze/utils/web.py +56 -0
  6. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze.egg-info/PKG-INFO +1 -1
  7. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze.egg-info/SOURCES.txt +2 -1
  8. {pycoze-0.1.487 → pycoze-0.1.489}/setup.py +1 -1
  9. pycoze-0.1.487/pycoze/api/lib/web.py +0 -67
  10. {pycoze-0.1.487 → pycoze-0.1.489}/LICENSE +0 -0
  11. {pycoze-0.1.487 → pycoze-0.1.489}/README.md +0 -0
  12. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze/__init__.py +0 -0
  13. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze/ai/__init__.py +0 -0
  14. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze/ai/llm/__init__.py +0 -0
  15. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze/ai/llm/chat.py +0 -0
  16. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze/ai/llm/text_to_image_prompt.py +0 -0
  17. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze/api/__init__.py +0 -0
  18. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze/api/lib/__init__.py +0 -0
  19. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze/api/lib/tab.py +0 -0
  20. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze/api/lib/view.py +0 -0
  21. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze/api/lib/window.py +0 -0
  22. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze/bot/__init__.py +0 -0
  23. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze/bot/chat.py +0 -0
  24. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze/bot/chat_base.py +0 -0
  25. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze/bot/lib.py +0 -0
  26. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze/bot/message.py +0 -0
  27. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze/bot/prompt.md +0 -0
  28. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze/reference/__init__.py +0 -0
  29. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze/reference/bot.py +0 -0
  30. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze/reference/fn.py +0 -0
  31. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze/reference/lib.py +0 -0
  32. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze/reference/tool.py +0 -0
  33. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze/reference/workflow.py +0 -0
  34. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze/ui/__init__.py +0 -0
  35. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze/ui/base.py +0 -0
  36. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze/ui/color.py +0 -0
  37. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze/ui/typ.py +0 -0
  38. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze/ui/ui_def.py +0 -0
  39. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze/utils/arg.py +0 -0
  40. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze/utils/env.py +0 -0
  41. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze/utils/process.py +0 -0
  42. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze/utils/socket.py +0 -0
  43. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze/utils/text_or_file.py +0 -0
  44. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze.egg-info/dependency_links.txt +0 -0
  45. {pycoze-0.1.487 → pycoze-0.1.489}/pycoze.egg-info/top_level.txt +0 -0
  46. {pycoze-0.1.487 → pycoze-0.1.489}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: pycoze
3
- Version: 0.1.487
3
+ Version: 0.1.489
4
4
  Summary: Package for pycoze only!
5
5
  Author: Yuan Jie Xiong
6
6
  Author-email: aiqqqqqqq@qq.com
@@ -0,0 +1,9 @@
1
+ from pycoze import utils
2
+ from bs4 import BeautifulSoup, Comment
3
+
4
+
5
+ socket = utils.socket
6
+
7
+
8
+ class WebCls:
9
+
@@ -117,7 +117,7 @@ class WriteFileTool(Tool):
117
117
  # 创建目录并写入或覆盖文件
118
118
  os.makedirs(os.path.dirname(path), exist_ok=True)
119
119
  with open(path, "w", encoding="utf-8") as f:
120
- f.write(self.params["content"].replace("@`@`@`@", "```")
120
+ f.write(self.params["content"].replace("@`@`@`@", "```"))
121
121
  return f"File [[{path}]] written successfully."
122
122
 
123
123
 
@@ -3,11 +3,13 @@ from .env import read_params_file, params, read_json_file
3
3
  from .socket import TcpSocket, socket, socket_subscribe
4
4
  from .text_or_file import to_text
5
5
  from .process import better_kill, execute_script, execute_script_and_block, execute_script_no_block
6
+ from .web import get_simplified_html
6
7
 
7
8
  __all__ = [
8
9
  read_arg,
9
10
  read_params_file, params, read_json_file,
10
11
  TcpSocket, socket, socket_subscribe,
11
12
  to_text,
12
- better_kill, execute_script, execute_script_and_block, execute_script_no_block
13
+ better_kill, execute_script, execute_script_and_block, execute_script_no_block,
14
+ get_simplified_html
13
15
  ]
@@ -0,0 +1,56 @@
1
+ from bs4 import BeautifulSoup, Comment
2
+
3
+ def get_simplified_html(html: str, selector=None) -> str:
4
+ soup = BeautifulSoup(html, 'html.parser')
5
+
6
+ # 如果指定了selector,则只提取该元素的内容
7
+ if selector:
8
+ element = soup.select_one(selector)
9
+ if element:
10
+ soup = BeautifulSoup(str(element), 'html.parser')
11
+ else:
12
+ return f"element not found: {selector}"
13
+
14
+ # 定义需要移除的标签
15
+ tags_to_remove = ['script', 'style', 'noscript', 'meta', 'link']
16
+ for tag in tags_to_remove:
17
+ for element in soup(tag):
18
+ element.decompose()
19
+
20
+ # 移除注释
21
+ for element in soup.find_all(string=lambda text: isinstance(text, Comment)):
22
+ element.extract()
23
+
24
+ # 定义需要保留的交互属性
25
+ INTERACTIVE_ATTRIBUTES = {
26
+ 'a': ['href', 'onclick'],
27
+ 'button': ['onclick'],
28
+ 'img': ['src', 'onload'],
29
+ 'form': ['action', 'onsubmit'],
30
+ 'input': ['type', 'onclick', 'onchange'],
31
+ '*': ['onclick', 'onload', 'onchange', 'onsubmit', 'onmouseover']
32
+ }
33
+
34
+ # 遍历所有标签,保留交互属性并移除其他属性
35
+ for element in soup.find_all(True):
36
+ tag_name = element.name
37
+ allowed_attrs = INTERACTIVE_ATTRIBUTES.get(tag_name, []) + INTERACTIVE_ATTRIBUTES['*']
38
+ attrs = list(element.attrs.keys())
39
+ for attr in attrs:
40
+ if attr not in allowed_attrs:
41
+ del element[attr]
42
+
43
+ # 如果是<img>标签,检查src是否为Base64
44
+ if tag_name == 'img' and 'src' in element.attrs and element['src'].startswith('data:'):
45
+ del element['src']
46
+
47
+ # 处理文本内容,超过1000字符则截取
48
+ if element.string and len(element.string) > 1000:
49
+ element.string = element.string[:1000] + '...'
50
+
51
+ # 移除标签之间的多余空白
52
+ for element in soup.find_all(True):
53
+ if not element.get_text(strip=True):
54
+ element.string = ''
55
+
56
+ return str(soup)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: pycoze
3
- Version: 0.1.487
3
+ Version: 0.1.489
4
4
  Summary: Package for pycoze only!
5
5
  Author: Yuan Jie Xiong
6
6
  Author-email: aiqqqqqqq@qq.com
@@ -39,4 +39,5 @@ pycoze/utils/arg.py
39
39
  pycoze/utils/env.py
40
40
  pycoze/utils/process.py
41
41
  pycoze/utils/socket.py
42
- pycoze/utils/text_or_file.py
42
+ pycoze/utils/text_or_file.py
43
+ pycoze/utils/web.py
@@ -2,7 +2,7 @@ from setuptools import setup, find_packages
2
2
 
3
3
  setup(
4
4
  name="pycoze",
5
- version="0.1.487",
5
+ version="0.1.489",
6
6
  packages=find_packages(),
7
7
  install_requires=[],
8
8
  author="Yuan Jie Xiong",
@@ -1,67 +0,0 @@
1
- from pycoze import utils
2
- from bs4 import BeautifulSoup, Comment
3
-
4
-
5
- socket = utils.socket
6
-
7
-
8
- class WebCls:
9
- def get_simplified_webpage(self, url: str) -> str:
10
- return socket.post_and_recv_result(
11
- "getSimplifiedWebpage", {"url": url}
12
- )
13
-
14
- def get_simplified_html(self, html: str, selector=None) -> str:
15
- soup = BeautifulSoup(html, 'html.parser')
16
-
17
- # 如果指定了selector,则只提取该元素的内容
18
- if selector:
19
- element = soup.select_one(selector)
20
- if element:
21
- soup = BeautifulSoup(str(element), 'html.parser')
22
- else:
23
- return f"element not found: {selector}"
24
-
25
- # 定义需要移除的标签
26
- tags_to_remove = ['script', 'style', 'noscript', 'meta', 'link']
27
- for tag in tags_to_remove:
28
- for element in soup(tag):
29
- element.decompose()
30
-
31
- # 移除注释
32
- for element in soup.find_all(string=lambda text: isinstance(text, Comment)):
33
- element.extract()
34
-
35
- # 定义需要保留的交互属性
36
- INTERACTIVE_ATTRIBUTES = {
37
- 'a': ['href', 'onclick'],
38
- 'button': ['onclick'],
39
- 'img': ['src', 'onload'],
40
- 'form': ['action', 'onsubmit'],
41
- 'input': ['type', 'onclick', 'onchange'],
42
- '*': ['onclick', 'onload', 'onchange', 'onsubmit', 'onmouseover']
43
- }
44
-
45
- # 遍历所有标签,保留交互属性并移除其他属性
46
- for element in soup.find_all(True):
47
- tag_name = element.name
48
- allowed_attrs = INTERACTIVE_ATTRIBUTES.get(tag_name, []) + INTERACTIVE_ATTRIBUTES['*']
49
- attrs = list(element.attrs.keys())
50
- for attr in attrs:
51
- if attr not in allowed_attrs:
52
- del element[attr]
53
-
54
- # 如果是<img>标签,检查src是否为Base64
55
- if tag_name == 'img' and 'src' in element.attrs and element['src'].startswith('data:'):
56
- del element['src']
57
-
58
- # 处理文本内容,超过1000字符则截取
59
- if element.string and len(element.string) > 1000:
60
- element.string = element.string[:1000] + '...'
61
-
62
- # 移除标签之间的多余空白
63
- for element in soup.find_all(True):
64
- if not element.get_text(strip=True):
65
- element.string = ''
66
-
67
- return str(soup)
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes