beswarm 0.1.12__py3-none-any.whl → 0.1.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (75) hide show
  1. beswarm/aient/main.py +50 -0
  2. beswarm/aient/setup.py +15 -0
  3. beswarm/aient/src/aient/__init__.py +1 -0
  4. beswarm/aient/src/aient/core/__init__.py +1 -0
  5. beswarm/aient/src/aient/core/log_config.py +6 -0
  6. beswarm/aient/src/aient/core/models.py +232 -0
  7. beswarm/aient/src/aient/core/request.py +1665 -0
  8. beswarm/aient/src/aient/core/response.py +617 -0
  9. beswarm/aient/src/aient/core/test/test_base_api.py +18 -0
  10. beswarm/aient/src/aient/core/test/test_image.py +15 -0
  11. beswarm/aient/src/aient/core/test/test_payload.py +92 -0
  12. beswarm/aient/src/aient/core/utils.py +715 -0
  13. beswarm/aient/src/aient/models/__init__.py +9 -0
  14. beswarm/aient/src/aient/models/audio.py +63 -0
  15. beswarm/aient/src/aient/models/base.py +251 -0
  16. beswarm/aient/src/aient/models/chatgpt.py +938 -0
  17. beswarm/aient/src/aient/models/claude.py +640 -0
  18. beswarm/aient/src/aient/models/duckduckgo.py +241 -0
  19. beswarm/aient/src/aient/models/gemini.py +357 -0
  20. beswarm/aient/src/aient/models/groq.py +268 -0
  21. beswarm/aient/src/aient/models/vertex.py +420 -0
  22. beswarm/aient/src/aient/plugins/__init__.py +33 -0
  23. beswarm/aient/src/aient/plugins/arXiv.py +48 -0
  24. beswarm/aient/src/aient/plugins/config.py +172 -0
  25. beswarm/aient/src/aient/plugins/excute_command.py +35 -0
  26. beswarm/aient/src/aient/plugins/get_time.py +19 -0
  27. beswarm/aient/src/aient/plugins/image.py +72 -0
  28. beswarm/aient/src/aient/plugins/list_directory.py +50 -0
  29. beswarm/aient/src/aient/plugins/read_file.py +79 -0
  30. beswarm/aient/src/aient/plugins/registry.py +116 -0
  31. beswarm/aient/src/aient/plugins/run_python.py +156 -0
  32. beswarm/aient/src/aient/plugins/websearch.py +394 -0
  33. beswarm/aient/src/aient/plugins/write_file.py +51 -0
  34. beswarm/aient/src/aient/prompt/__init__.py +1 -0
  35. beswarm/aient/src/aient/prompt/agent.py +280 -0
  36. beswarm/aient/src/aient/utils/__init__.py +0 -0
  37. beswarm/aient/src/aient/utils/prompt.py +143 -0
  38. beswarm/aient/src/aient/utils/scripts.py +721 -0
  39. beswarm/aient/test/chatgpt.py +161 -0
  40. beswarm/aient/test/claude.py +32 -0
  41. beswarm/aient/test/test.py +2 -0
  42. beswarm/aient/test/test_API.py +6 -0
  43. beswarm/aient/test/test_Deepbricks.py +20 -0
  44. beswarm/aient/test/test_Web_crawler.py +262 -0
  45. beswarm/aient/test/test_aiwaves.py +25 -0
  46. beswarm/aient/test/test_aiwaves_arxiv.py +19 -0
  47. beswarm/aient/test/test_ask_gemini.py +8 -0
  48. beswarm/aient/test/test_class.py +17 -0
  49. beswarm/aient/test/test_claude.py +23 -0
  50. beswarm/aient/test/test_claude_zh_char.py +26 -0
  51. beswarm/aient/test/test_ddg_search.py +50 -0
  52. beswarm/aient/test/test_download_pdf.py +56 -0
  53. beswarm/aient/test/test_gemini.py +97 -0
  54. beswarm/aient/test/test_get_token_dict.py +21 -0
  55. beswarm/aient/test/test_google_search.py +35 -0
  56. beswarm/aient/test/test_jieba.py +32 -0
  57. beswarm/aient/test/test_json.py +65 -0
  58. beswarm/aient/test/test_langchain_search_old.py +235 -0
  59. beswarm/aient/test/test_logging.py +32 -0
  60. beswarm/aient/test/test_ollama.py +55 -0
  61. beswarm/aient/test/test_plugin.py +16 -0
  62. beswarm/aient/test/test_py_run.py +26 -0
  63. beswarm/aient/test/test_requests.py +162 -0
  64. beswarm/aient/test/test_search.py +18 -0
  65. beswarm/aient/test/test_tikitoken.py +19 -0
  66. beswarm/aient/test/test_token.py +94 -0
  67. beswarm/aient/test/test_url.py +33 -0
  68. beswarm/aient/test/test_whisper.py +14 -0
  69. beswarm/aient/test/test_wildcard.py +20 -0
  70. beswarm/aient/test/test_yjh.py +21 -0
  71. {beswarm-0.1.12.dist-info → beswarm-0.1.13.dist-info}/METADATA +1 -1
  72. beswarm-0.1.13.dist-info/RECORD +131 -0
  73. beswarm-0.1.12.dist-info/RECORD +0 -61
  74. {beswarm-0.1.12.dist-info → beswarm-0.1.13.dist-info}/WHEEL +0 -0
  75. {beswarm-0.1.12.dist-info → beswarm-0.1.13.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,161 @@
1
+ function_call_list = \
2
+ {
3
+ "base": {
4
+ "tools": [],
5
+ "tool_choice": "auto"
6
+ },
7
+ "current_weather": {
8
+ "name": "get_current_weather",
9
+ "description": "Get the current weather in a given location",
10
+ "parameters": {
11
+ "type": "object",
12
+ "properties": {
13
+ "location": {
14
+ "type": "string",
15
+ "description": "The city and state, e.g. San Francisco, CA"
16
+ },
17
+ "unit": {
18
+ "type": "string",
19
+ "enum": [
20
+ "celsius",
21
+ "fahrenheit"
22
+ ]
23
+ }
24
+ },
25
+ "required": [
26
+ "location"
27
+ ]
28
+ }
29
+ },
30
+ "SEARCH": {
31
+ "name": "get_search_results",
32
+ "description": "Search Google to enhance knowledge.",
33
+ "parameters": {
34
+ "type": "object",
35
+ "properties": {
36
+ "prompt": {
37
+ "type": "string",
38
+ "description": "The prompt to search."
39
+ }
40
+ },
41
+ "required": [
42
+ "prompt"
43
+ ]
44
+ }
45
+ },
46
+ "URL": {
47
+ "name": "get_url_content",
48
+ "description": "Get the webpage content of a URL",
49
+ "parameters": {
50
+ "type": "object",
51
+ "properties": {
52
+ "url": {
53
+ "type": "string",
54
+ "description": "the URL to request"
55
+ }
56
+ },
57
+ "required": [
58
+ "url"
59
+ ]
60
+ }
61
+ },
62
+ "DATE": {
63
+ "name": "get_time",
64
+ "description": "Get the current time, date, and day of the week",
65
+ },
66
+ "VERSION": {
67
+ "name": "get_version_info",
68
+ "description": "Get version information",
69
+ },
70
+ "TARVEL": {
71
+ "name": "get_city_tarvel_info",
72
+ "description": "Get the city's travel plan by city name.",
73
+ "parameters": {
74
+ "type": "object",
75
+ "properties": {
76
+ "city": {
77
+ "type": "string",
78
+ "description": "the city to search"
79
+ }
80
+ },
81
+ "required": [
82
+ "city"
83
+ ]
84
+ }
85
+ },
86
+ "IMAGE": {
87
+ "name": "generate_image",
88
+ "description": "Generate images based on user descriptions.",
89
+ "parameters": {
90
+ "type": "object",
91
+ "properties": {
92
+ "prompt": {
93
+ "type": "string",
94
+ "description": "the prompt to generate image"
95
+ }
96
+ },
97
+ "required": [
98
+ "prompt"
99
+ ]
100
+ }
101
+ },
102
+ "CODE": {
103
+ "name": "run_python_script",
104
+ "description": "Convert the string to a Python script and return the Python execution result. Assign the result to the variable result. The results must be printed to the console using the print function. Directly output the code, without using quotation marks or other symbols to enclose the code.",
105
+ "parameters": {
106
+ "type": "object",
107
+ "properties": {
108
+ "prompt": {
109
+ "type": "string",
110
+ "description": "the code to run"
111
+ }
112
+ },
113
+ "required": [
114
+ "prompt"
115
+ ]
116
+ }
117
+ },
118
+ "ARXIV": {
119
+ "name": "download_read_arxiv_pdf",
120
+ "description": "Get the content of the paper corresponding to the arXiv ID",
121
+ "parameters": {
122
+ "type": "object",
123
+ "properties": {
124
+ "prompt": {
125
+ "type": "string",
126
+ "description": "the arXiv ID of the paper"
127
+ }
128
+ },
129
+ "required": [
130
+ "prompt"
131
+ ]
132
+ }
133
+ },
134
+ "FLIGHT": {
135
+ "name": "get_Round_trip_flight_price",
136
+ "description": "Get round-trip ticket prices between two cities for the next six months. Use two city names as parameters. The name of the citys must be in Chinese.",
137
+ "parameters": {
138
+ "type": "object",
139
+ "properties": {
140
+ "departcity": {
141
+ "type": "string",
142
+ "description": "the chinese name of departure city. e.g. 上海"
143
+ },
144
+ "arrivalcity": {
145
+ "type": "string",
146
+ "description": "the chinese name of arrival city. e.g. 北京"
147
+ }
148
+ },
149
+ "required": [
150
+ "departcity",
151
+ "arrivalcity"
152
+ ]
153
+ }
154
+ },
155
+ }
156
+
157
+
158
+ if __name__ == "__main__":
159
+ import json
160
+ tools_list = {"tools": [{"type": "function", "function": function_call_list[key]} for key in function_call_list.keys() if key != "base"]}
161
+ print(json.dumps(tools_list, indent=4, ensure_ascii=False))
@@ -0,0 +1,32 @@
1
+ # import os
2
+ # import sys
3
+ # print(os.path.dirname(os.path.abspath(__file__)))
4
+ # sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
5
+ from .chatgpt import function_call_list
6
+ def gpt2claude_tools_json(json_dict):
7
+ import copy
8
+ json_dict = copy.deepcopy(json_dict)
9
+ keys_to_change = {
10
+ "parameters": "input_schema",
11
+ }
12
+ for old_key, new_key in keys_to_change.items():
13
+ if old_key in json_dict:
14
+ if new_key:
15
+ json_dict[new_key] = json_dict.pop(old_key)
16
+ else:
17
+ json_dict.pop(old_key)
18
+ else:
19
+ if new_key and "description" in json_dict.keys():
20
+ json_dict[new_key] = {
21
+ "type": "object",
22
+ "properties": {}
23
+ }
24
+ if "tools" in json_dict.keys():
25
+ json_dict["tool_choice"] = {
26
+ "type": "auto"
27
+ }
28
+ return json_dict
29
+
30
+ claude_tools_list = {f"{key}": gpt2claude_tools_json(function_call_list[key]) for key in function_call_list.keys()}
31
+ if __name__ == "__main__":
32
+ print(claude_tools_list)
@@ -0,0 +1,2 @@
1
+ a = "v1"
2
+ print(a.split("v1"))
@@ -0,0 +1,6 @@
1
+ def replace_with_asterisk(string, start=15, end=40):
2
+ return string[:start] + '*' * (end - start) + string[end:]
3
+
4
+ original_string = "sk-zIuWeeuWY8vNCVhhHCXLroNmA6QhBxnv0ARMFcODVQwwqGRg"
5
+ result = replace_with_asterisk(original_string)
6
+ print(result)
@@ -0,0 +1,20 @@
1
+ import os
2
+ from datetime import datetime
3
+
4
+ from aient.models import chatgpt
5
+ from aient.utils import prompt
6
+
7
+ API = os.environ.get('API', None)
8
+ API_URL = os.environ.get('API_URL', None)
9
+ GPT_ENGINE = os.environ.get('GPT_ENGINE', 'gpt-4o')
10
+ LANGUAGE = os.environ.get('LANGUAGE', 'Simplified Chinese')
11
+
12
+ current_date = datetime.now()
13
+ Current_Date = current_date.strftime("%Y-%m-%d")
14
+
15
+ systemprompt = os.environ.get('SYSTEMPROMPT', prompt.system_prompt.format(LANGUAGE, Current_Date))
16
+
17
+ bot = chatgpt(api_key=API, api_url=API_URL, engine=GPT_ENGINE, system_prompt=systemprompt)
18
+ # for text in bot.ask_stream("你好"):
19
+ for text in bot.ask_stream("arXiv:2311.17132 讲了什么?"):
20
+ print(text, end="")
@@ -0,0 +1,262 @@
1
+ import re
2
+ import os
3
+ os.system('cls' if os.name == 'nt' else 'clear')
4
+ import time
5
+ import requests
6
+ from bs4 import BeautifulSoup
7
+
8
+ def Web_crawler(url: str, isSearch=False) -> str:
9
+ """返回链接网址url正文内容,必须是合法的网址"""
10
+ headers = {
11
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
12
+ }
13
+ result = ''
14
+ try:
15
+ requests.packages.urllib3.disable_warnings()
16
+ response = requests.get(url, headers=headers, verify=False, timeout=3, stream=True)
17
+ if response.status_code == 404:
18
+ print("Page not found:", url)
19
+ return ""
20
+ # return "抱歉,网页不存在,目前无法访问该网页。@Trash@"
21
+ content_length = int(response.headers.get('Content-Length', 0))
22
+ if content_length > 5000000:
23
+ print("Skipping large file:", url)
24
+ return result
25
+ try:
26
+ soup = BeautifulSoup(response.text.encode(response.encoding), 'xml', from_encoding='utf-8')
27
+ except:
28
+ soup = BeautifulSoup(response.text.encode(response.encoding), 'html.parser', from_encoding='utf-8')
29
+ # print("soup", soup)
30
+
31
+ for script in soup(["script", "style"]):
32
+ script.decompose()
33
+
34
+ table_contents = ""
35
+ tables = soup.find_all('table')
36
+ for table in tables:
37
+ table_contents += table.get_text()
38
+ table.decompose()
39
+
40
+ # body_text = "".join(soup.find('body').get_text().split('\n'))
41
+ body = soup.find('body')
42
+ if body:
43
+ body_text = body.get_text(separator=' ', strip=True)
44
+ else:
45
+ body_text = soup.get_text(separator=' ', strip=True)
46
+
47
+ result = table_contents + body_text
48
+ if result == '' and not isSearch:
49
+ result = ""
50
+ # result = "抱歉,可能反爬虫策略,目前无法访问该网页。@Trash@"
51
+ if result.count("\"") > 1000:
52
+ result = ""
53
+ except Exception as e:
54
+ print('\033[31m')
55
+ print("error: url", url)
56
+ print("error", e)
57
+ print('\033[0m')
58
+ result = "抱歉,目前无法访问该网页。"
59
+ # print("url content", result + "\n\n")
60
+ print(result)
61
+ return result
62
+
63
+ import lxml.html
64
+ from lxml.html.clean import Cleaner
65
+ import httpx
66
+ def get_body(url):
67
+ body = lxml.html.fromstring(httpx.get(url).text).xpath('//body')[0]
68
+ body = Cleaner(javascript=True, style=True).clean_html(body)
69
+ return ''.join(lxml.html.tostring(c, encoding='unicode') for c in body)
70
+
71
+ import re
72
+ import httpx
73
+ import lxml.html
74
+ from lxml.html.clean import Cleaner
75
+ from html2text import HTML2Text
76
+ from textwrap import dedent
77
+
78
+ def url_to_markdown(url):
79
+ # 获取并清理网页内容
80
+ def get_body(url):
81
+ try:
82
+ text = httpx.get(url, verify=False, timeout=5).text
83
+ if text == "":
84
+ return "抱歉,目前无法访问该网页。"
85
+ # body = lxml.html.fromstring(text).xpath('//body')
86
+
87
+ doc = lxml.html.fromstring(text)
88
+ # 检查是否是GitHub raw文件格式(body > pre)
89
+ if doc.xpath('//body/pre'):
90
+ return text # 直接返回原始文本,保留格式
91
+
92
+ body = doc.xpath('//body')
93
+ if body == [] and text != "":
94
+ body = text
95
+ return f'<pre>{body}</pre>'
96
+ # return body
97
+ else:
98
+ body = body[0]
99
+ body = Cleaner(javascript=True, style=True).clean_html(body)
100
+ return ''.join(lxml.html.tostring(c, encoding='unicode') for c in body)
101
+ except Exception as e:
102
+ print('\033[31m')
103
+ print("error: url", url)
104
+ print("error", e)
105
+ print('\033[0m')
106
+ return "抱歉,目前无法访问该网页。"
107
+
108
+ # 将HTML转换为Markdown
109
+ def get_md(cts):
110
+ h2t = HTML2Text(bodywidth=5000)
111
+ h2t.ignore_links = True
112
+ h2t.mark_code = True
113
+ h2t.ignore_images = True
114
+ res = h2t.handle(cts)
115
+
116
+ def _f(m):
117
+ return f'```\n{dedent(m.group(1))}\n```'
118
+
119
+ return re.sub(r'\[code]\s*\n(.*?)\n\[/code]', _f, res or '', flags=re.DOTALL).strip()
120
+
121
+ # 获取网页内容
122
+ body_content = get_body(url)
123
+
124
+ # 转换为Markdown
125
+ markdown_content = get_md(body_content)
126
+
127
+ return markdown_content
128
+
129
+ def jina_ai_Web_crawler(url: str, isSearch=False) -> str:
130
+ """返回链接网址url正文内容,必须是合法的网址"""
131
+ headers = {
132
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
133
+ }
134
+ result = ''
135
+ try:
136
+ requests.packages.urllib3.disable_warnings()
137
+ url = "https://r.jina.ai/" + url
138
+ response = requests.get(url, headers=headers, verify=False, timeout=5, stream=True)
139
+ if response.status_code == 404:
140
+ print("Page not found:", url)
141
+ return "抱歉,网页不存在,目前无法访问该网页。@Trash@"
142
+ content_length = int(response.headers.get('Content-Length', 0))
143
+ if content_length > 5000000:
144
+ print("Skipping large file:", url)
145
+ return result
146
+ soup = BeautifulSoup(response.text.encode(response.encoding), 'lxml', from_encoding='utf-8')
147
+ table_contents = ""
148
+ tables = soup.find_all('table')
149
+ for table in tables:
150
+ table_contents += table.get_text()
151
+ table.decompose()
152
+ body = "".join(soup.find('body').get_text().split('\n'))
153
+ result = table_contents + body
154
+ if result == '' and not isSearch:
155
+ result = "抱歉,可能反爬虫策略,目前无法访问该网页。@Trash@"
156
+ if result.count("\"") > 1000:
157
+ result = ""
158
+ except Exception as e:
159
+ print('\033[31m')
160
+ print("error: url", url)
161
+ print("error", e)
162
+ print('\033[0m')
163
+ result = "抱歉,目前无法访问该网页。"
164
+ print(result + "\n\n")
165
+ return result
166
+
167
+
168
+ def get_url_content(url: str) -> str:
169
+ """
170
+ 比较 url_to_markdown 和 jina_ai_Web_crawler 的结果,选择更好的内容
171
+
172
+ :param url: 要爬取的网页URL
173
+ :return: 选择的更好的内容
174
+ """
175
+ markdown_content = url_to_markdown(url)
176
+ print(markdown_content)
177
+ print('-----------------------------')
178
+ jina_content = jina_ai_Web_crawler(url)
179
+ print('-----------------------------')
180
+
181
+ # 定义评分函数
182
+ def score_content(content):
183
+ # 1. 内容长度
184
+ length_score = len(content)
185
+
186
+ # 2. 是否包含错误信息
187
+ error_penalty = 1000 if "抱歉" in content or "@Trash@" in content else 0
188
+
189
+ # 3. 内容的多样性(可以通过不同类型的字符来粗略估计)
190
+ diversity_score = len(set(content))
191
+
192
+ # 4. 特殊字符比例(过高可能意味着格式问题)
193
+ special_char_ratio = len(re.findall(r'[^a-zA-Z0-9\u4e00-\u9fff\s]', content)) / len(content)
194
+ special_char_penalty = 500 if special_char_ratio > 0.1 else 0
195
+
196
+ return length_score + diversity_score - error_penalty - special_char_penalty
197
+
198
+ if markdown_content == "":
199
+ markdown_score = -2000
200
+ else:
201
+ markdown_score = score_content(markdown_content)
202
+ if jina_content == "":
203
+ jina_score = -2000
204
+ else:
205
+ jina_score = score_content(jina_content)
206
+
207
+ print(f"url_to_markdown 得分: {markdown_score}")
208
+ print(f"jina_ai_Web_crawler 得分: {jina_score}")
209
+
210
+ if markdown_score > jina_score:
211
+ print("选择 url_to_markdown 的结果")
212
+ return markdown_content
213
+ elif markdown_score == jina_score and jina_score < 0:
214
+ print("两者都无法访问")
215
+ return ""
216
+ else:
217
+ print("选择 jina_ai_Web_crawler 的结果")
218
+ return jina_content
219
+
220
+ start_time = time.time()
221
+ # for url in ['https://www.zhihu.com/question/557257320', 'https://job.achi.idv.tw/2021/12/05/what-is-the-403-forbidden-error-how-to-fix-it-8-methods-explained/', 'https://www.lifewire.com/403-forbidden-error-explained-2617989']:
222
+ # for url in ['https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/403']:
223
+ # for url in ['https://www.hostinger.com/tutorials/what-is-403-forbidden-error-and-how-to-fix-it']:
224
+ # for url in ['https://beebom.com/what-is-403-forbidden-error-how-to-fix/']:
225
+ # for url in ['https://www.lifewire.com/403-forbidden-error-explained-2617989']:
226
+ # for url in ['https://www.usnews.com/news/best-countries/articles/2022-02-24/explainer-why-did-russia-invade-ukraine']:
227
+ # for url in ['https://github.com/EAimTY/tuic']:
228
+ # TODO 没办法访问
229
+ # for url in ['https://s.weibo.com/top/summary?cate=realtimehot']:
230
+ # for url in ['https://www.microsoft.com/en-us/security/blog/2023/05/24/volt-typhoon-targets-us-critical-infrastructure-with-living-off-the-land-techniques/']:
231
+ # for url in ['https://tophub.today/n/KqndgxeLl9']:
232
+ # for url in ['https://support.apple.com/zh-cn/HT213931']:
233
+ # for url in ["https://zeta.zeabur.app"]:
234
+ # for url in ["https://www.anthropic.com/research/probes-catch-sleeper-agents"]:
235
+ # for url in ['https://finance.sina.com.cn/stock/roll/2023-06-26/doc-imyyrexk4053724.shtml']:
236
+ # for url in ['https://s.weibo.com/top/summary?cate=realtimehot']:
237
+ # for url in ['https://tophub.today/n/KqndgxeLl9', 'https://www.whatsonweibo.com/', 'https://www.trendingonweibo.com/?ref=producthunt', 'https://www.trendingonweibo.com/', 'https://www.statista.com/statistics/1377073/china-most-popular-news-on-weibo/']:
238
+ # for url in ['https://www.usnews.com/news/entertainment/articles/2023-12-22/china-drafts-new-rules-proposing-restrictions-on-online-gaming']:
239
+ # for url in ['https://developer.aliyun.com/article/721836']:
240
+ # for url in ['https://cn.aliyun.com/page-source/price/detail/machinelearning_price']:
241
+ # for url in ['https://mp.weixin.qq.com/s/Itad7Y-QBcr991JkF3SrIg']:
242
+ # for url in ['https://zhidao.baidu.com/question/317577832.html']:
243
+ # for url in ['https://www.cnn.com/2023/09/06/tech/huawei-mate-60-pro-phone/index.html']:
244
+ # for url in ['https://www.reddit.com/r/China_irl/comments/15qojkh/46%E6%9C%88%E5%A4%96%E8%B5%84%E5%AF%B9%E4%B8%AD%E5%9B%BD%E7%9B%B4%E6%8E%A5%E6%8A%95%E8%B5%84%E5%87%8F87/', 'https://www.apple.com.cn/job-creation/Apple_China_CSR_Report_2020.pdf', 'https://hdr.undp.org/system/files/documents/hdr2013chpdf.pdf']:
245
+ # for url in ['https://www.airuniversity.af.edu/JIPA/Display/Article/3111127/the-uschina-trade-war-vietnam-emerges-as-the-greatest-winner/']:
246
+ # for url in ['https://zhuanlan.zhihu.com/p/646786536']:
247
+ # for url in ['https://zh.wikipedia.org/wiki/%E4%BF%84%E7%BE%85%E6%96%AF%E5%85%A5%E4%BE%B5%E7%83%8F%E5%85%8B%E8%98%AD']:
248
+ for url in ['https://raw.githubusercontent.com/yym68686/ChatGPT-Telegram-Bot/main/README.md']:
249
+ # for url in ['https://raw.githubusercontent.com/openai/openai-python/main/src/openai/api_requestor.py']:
250
+ # for url in ['https://stock.finance.sina.com.cn/usstock/quotes/aapl.html']:
251
+ # Web_crawler(url)
252
+ # print(get_body(url))
253
+ # print('-----------------------------')
254
+ # jina_ai_Web_crawler(url)
255
+ # print('-----------------------------')
256
+ # print(url_to_markdown(url))
257
+ # print('-----------------------------')
258
+ best_content = get_url_content(url)
259
+ end_time = time.time()
260
+ run_time = end_time - start_time
261
+ # 打印运行时间
262
+ print(f"程序运行时间:{run_time}秒")
@@ -0,0 +1,25 @@
1
+ import os
2
+ from aient.models import chatgpt
3
+
4
+ API = os.environ.get('API', None)
5
+ API_URL = os.environ.get('API_URL', None)
6
+ GPT_ENGINE = os.environ.get('GPT_ENGINE', 'gpt-4o')
7
+
8
+ systemprompt = (
9
+ "你是一位旅行规划专家。你需要帮助用户规划旅行行程,给出合理的行程安排。"
10
+ "- 如果用户提及要从一个城市前往另外一个城市,必须使用 get_Round_trip_flight_price 查询两个城市半年内往返机票价格信息。"
11
+ "- 在规划行程之前,必须使用 get_city_tarvel_info 查询城市的景点旅行攻略信息。"
12
+ "- 查询攻略后,你需要分析用户个性化需求。充分考虑用户的年龄,情侣,家庭,朋友,儿童,独自旅行等情况。排除不适合用户个性化需求的景点。之后输出符合用户需求的景点。"
13
+ "- 综合用户游玩时间,适合用户个性化需求的旅游城市景点,机票信息和预算,给出真实准确的旅游行程,包括游玩时长、景点之间的交通方式和移动距离,每天都要给出总的游玩时间。"
14
+ "- 根据查到的景点介绍结合你自己的知识,每个景点必须包含你推荐的理由和景点介绍。介绍景点用户游玩的景点,景点介绍尽量丰富精彩,吸引用户眼球,不要直接复述查到的景点介绍。"
15
+ "- 每个景点都要标注游玩时间、景点之间的交通方式和移动距离还有生动的景点介绍"
16
+ "- 尽量排满用户的行程,不要有太多空闲时间。"
17
+ )
18
+ bot = chatgpt(api_key=API, api_url=API_URL, engine=GPT_ENGINE, system_prompt=systemprompt)
19
+ for text in bot.ask_stream("我在上海想去重庆旅游,我只有2000元预算,我想在重庆玩一周,你能帮我规划一下吗?"):
20
+ # for text in bot.ask_stream("我在广州市,想周一去香港,周四早上回来,是去游玩,请你帮我规划整个行程。包括细节,如交通,住宿,餐饮,价格,等等,最好细节到每天各个部分的时间,花费,等等,尽量具体,用户一看就能直接执行的那种"):
21
+ # for text in bot.ask_stream("上海有哪些好玩的地方?"):
22
+ # for text in bot.ask_stream("just say test"):
23
+ # for text in bot.ask_stream("我在上海想去重庆旅游,我只有2000元预算,我想在重庆玩一周,你能帮我规划一下吗?"):
24
+ # for text in bot.ask_stream("我在上海想去重庆旅游,我有一天的时间。你能帮我规划一下吗?"):
25
+ print(text, end="")
@@ -0,0 +1,19 @@
1
+ import os
2
+ from datetime import datetime
3
+
4
+ from aient.models import chatgpt
5
+ from aient.utils import prompt
6
+
7
+ API = os.environ.get('API', None)
8
+ API_URL = os.environ.get('API_URL', None)
9
+ GPT_ENGINE = os.environ.get('GPT_ENGINE', 'gpt-4o')
10
+ LANGUAGE = os.environ.get('LANGUAGE', 'Simplified Chinese')
11
+
12
+ current_date = datetime.now()
13
+ Current_Date = current_date.strftime("%Y-%m-%d")
14
+
15
+ systemprompt = os.environ.get('SYSTEMPROMPT', prompt.system_prompt.format(LANGUAGE, Current_Date))
16
+
17
+ bot = chatgpt(api_key=API, api_url=API_URL, engine=GPT_ENGINE, system_prompt=systemprompt)
18
+ for text in bot.ask_stream("arXiv:2311.17132 讲了什么?"):
19
+ print(text, end="")
@@ -0,0 +1,8 @@
1
+ import os
2
+ from aient.models import gemini
3
+
4
+ GOOGLE_AI_API_KEY = os.environ.get('GOOGLE_AI_API_KEY', None)
5
+
6
+ bot = gemini(api_key=GOOGLE_AI_API_KEY, engine='gemini-2.0-flash-exp')
7
+ for text in bot.ask_stream("give me some example code of next.js to build a modern web site"):
8
+ print(text, end="")
@@ -0,0 +1,17 @@
1
+ # return e
2
+ def j(e, f):
3
+ e(f)
4
+ # return e
5
+ class a:
6
+ def __init__(self) -> None:
7
+ self.b = [1, 2, 3]
8
+ def d(self, e):
9
+ e.append(4)
10
+ def c(self):
11
+ j(self.d, self.b)
12
+ return self.b
13
+
14
+ k = a()
15
+ print(k.b)
16
+ print(k.c())
17
+ print(k.b)
@@ -0,0 +1,23 @@
1
+ import os
2
+ from datetime import datetime
3
+
4
+ from aient.models import chatgpt, claude3
5
+ from aient.utils import prompt
6
+
7
+ API = os.environ.get('API', None)
8
+ CLAUDE_API = os.environ.get('claude_api_key', None)
9
+ API_URL = os.environ.get('API_URL', None)
10
+ GPT_ENGINE = os.environ.get('GPT_ENGINE', 'gpt-4o')
11
+ LANGUAGE = os.environ.get('LANGUAGE', 'Simplified Chinese')
12
+
13
+ current_date = datetime.now()
14
+ Current_Date = current_date.strftime("%Y-%m-%d")
15
+
16
+ systemprompt = os.environ.get('SYSTEMPROMPT', prompt.system_prompt.format(LANGUAGE, Current_Date))
17
+
18
+ # bot = chatgpt(api_key=API, api_url=API_URL, engine=GPT_ENGINE, system_prompt=systemprompt)
19
+ bot = claude3(api_key=CLAUDE_API, engine=GPT_ENGINE, system_prompt=systemprompt)
20
+ for text in bot.ask_stream("arXiv:2210.10716 这篇文章讲了啥"):
21
+ # for text in bot.ask_stream("今天的微博热搜有哪些?"):
22
+ # for text in bot.ask_stream("你现在是什么版本?"):
23
+ print(text, end="")
@@ -0,0 +1,26 @@
1
+ def is_surrounded_by_chinese(text, index):
2
+ left_char = text[index - 1]
3
+ if 0 < index < len(text) - 1:
4
+ right_char = text[index + 1]
5
+ return '\u4e00' <= left_char <= '\u9fff' or '\u4e00' <= right_char <= '\u9fff'
6
+ if index == len(text) - 1:
7
+ return '\u4e00' <= left_char <= '\u9fff'
8
+ return False
9
+
10
+ def replace_char(string, index, new_char):
11
+ return string[:index] + new_char + string[index+1:]
12
+
13
+ def claude_replace(text):
14
+ Punctuation_mapping = {",": ",", ":": ":", "!": "!", "?": "?", ";": ";"}
15
+ key_list = list(Punctuation_mapping.keys())
16
+ for i in range(len(text)):
17
+ if is_surrounded_by_chinese(text, i) and (text[i] in key_list):
18
+ text = replace_char(text, i, Punctuation_mapping[text[i]])
19
+ return text
20
+
21
+ text = '''
22
+ 你好!我是一名人工智能助手,很高兴见到你。有什么我可以帮助你的吗?无论是日常问题还是专业领域,我都会尽我所能为你解答。让我们开始愉快的交流吧!'''
23
+
24
+ if __name__ == '__main__':
25
+ new_text = claude_replace(text)
26
+ print(new_text)
@@ -0,0 +1,50 @@
1
+ from itertools import islice
2
+ from duckduckgo_search import DDGS
3
+
4
+ # def getddgsearchurl(query, max_results=4):
5
+ # try:
6
+ # webresult = DDGS().text(query, max_results=max_results)
7
+ # if webresult == None:
8
+ # return []
9
+ # urls = [result['href'] for result in webresult]
10
+ # except Exception as e:
11
+ # print('\033[31m')
12
+ # print("duckduckgo error", e)
13
+ # print('\033[0m')
14
+ # urls = []
15
+ # # print("ddg urls", urls)
16
+ # return urls
17
+
18
+ def getddgsearchurl(query, max_results=4):
19
+ try:
20
+ results = []
21
+ with DDGS() as ddgs:
22
+ ddgs_gen = ddgs.text(query, safesearch='Off', timelimit='y', backend="lite")
23
+ for r in islice(ddgs_gen, max_results):
24
+ results.append(r)
25
+ urls = [result['href'] for result in results]
26
+ except Exception as e:
27
+ print('\033[31m')
28
+ print("duckduckgo error", e)
29
+ print('\033[0m')
30
+ urls = []
31
+ return urls
32
+
33
+ def search_answers(keywords, max_results=4):
34
+ results = []
35
+ with DDGS() as ddgs:
36
+ # 使用DuckDuckGo搜索关键词
37
+ ddgs_gen = ddgs.answers(keywords)
38
+ # 从搜索结果中获取最大结果数
39
+ for r in islice(ddgs_gen, max_results):
40
+ results.append(r)
41
+
42
+ # 返回一个json响应,包含搜索结果
43
+ return {'results': results}
44
+
45
+
46
+ if __name__ == '__main__':
47
+ # 搜索关键词
48
+ query = "OpenAI"
49
+ print(getddgsearchurl(query))
50
+ # print(search_answers(query))