chatgpt-mirai-qq-bot-web-search 0.2.2__py3-none-any.whl → 0.2.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {chatgpt_mirai_qq_bot_web_search-0.2.2.dist-info → chatgpt_mirai_qq_bot_web_search-0.2.4.dist-info}/METADATA +2 -2
- chatgpt_mirai_qq_bot_web_search-0.2.4.dist-info/RECORD +11 -0
- web_search/__init__.py +2 -1
- web_search/blocks.py +63 -0
- web_search/web_searcher.py +157 -2
- chatgpt_mirai_qq_bot_web_search-0.2.2.dist-info/RECORD +0 -11
- {chatgpt_mirai_qq_bot_web_search-0.2.2.dist-info → chatgpt_mirai_qq_bot_web_search-0.2.4.dist-info}/LICENSE +0 -0
- {chatgpt_mirai_qq_bot_web_search-0.2.2.dist-info → chatgpt_mirai_qq_bot_web_search-0.2.4.dist-info}/WHEEL +0 -0
- {chatgpt_mirai_qq_bot_web_search-0.2.2.dist-info → chatgpt_mirai_qq_bot_web_search-0.2.4.dist-info}/entry_points.txt +0 -0
- {chatgpt_mirai_qq_bot_web_search-0.2.2.dist-info → chatgpt_mirai_qq_bot_web_search-0.2.4.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: chatgpt-mirai-qq-bot-web-search
|
3
|
-
Version: 0.2.
|
3
|
+
Version: 0.2.4
|
4
4
|
Summary: WebSearch adapter for lss233/chatgpt-mirai-qq-bot
|
5
5
|
Home-page: https://github.com/chuanSir123/web_search
|
6
6
|
Author: chuanSir
|
@@ -18,7 +18,7 @@ Requires-Dist: playwright
|
|
18
18
|
Requires-Dist: trafilatura
|
19
19
|
Requires-Dist: lxml-html-clean
|
20
20
|
|
21
|
-
#
|
21
|
+
# web_search for ChatGPT-Mirai-QQ-Bot
|
22
22
|
|
23
23
|
本项目是 [ChatGPT-Mirai-QQ-Bot](https://github.com/lss233/chatgpt-mirai-qq-bot) 的一个插件,用于将OneBot协议的消息转换为ChatGPT-Mirai-QQ-Bot的消息格式。
|
24
24
|
|
@@ -0,0 +1,11 @@
|
|
1
|
+
web_search/__init__.py,sha256=bpu1gpf9tq3mOAoaGMM-8S6GBw2GzRAG6ClHGlp-CVw,4607
|
2
|
+
web_search/blocks.py,sha256=NJqWOAp7X-WMM3c3Xn3qLb9IVV090W3MqELHZTAw8tk,8959
|
3
|
+
web_search/config.py,sha256=DhLiERBJR2V5Boglf7Aq9Rbc4vsvLIh67CrLDIPeqA0,398
|
4
|
+
web_search/web_searcher.py,sha256=760WTBGusxYntigGi5Wsbm2rRHEWq3sTwfNlf3ew3b0,20919
|
5
|
+
web_search/example/roleplayWithWebSearch.yaml,sha256=C-dGy3z8gcRcmxzurssP-kPRLqMf1TYR-nnNUaJjISE,7468
|
6
|
+
chatgpt_mirai_qq_bot_web_search-0.2.4.dist-info/LICENSE,sha256=ILBn-G3jdarm2w8oOrLmXeJNU3czuJvVhDLBASWdhM8,34522
|
7
|
+
chatgpt_mirai_qq_bot_web_search-0.2.4.dist-info/METADATA,sha256=RaMgn6mSy-K2tEAvQMzmwDwO1rttpBHfrWqGP5VS9Yo,1734
|
8
|
+
chatgpt_mirai_qq_bot_web_search-0.2.4.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
9
|
+
chatgpt_mirai_qq_bot_web_search-0.2.4.dist-info/entry_points.txt,sha256=o3kRDSdSmSdnCKlK6qS57aN0WpI4ab-Nxub2NwUrjf0,64
|
10
|
+
chatgpt_mirai_qq_bot_web_search-0.2.4.dist-info/top_level.txt,sha256=PoNm8MJYw_y8RTMaNlY0ePLoNHxVUAE2IHDuL5fFubI,11
|
11
|
+
chatgpt_mirai_qq_bot_web_search-0.2.4.dist-info/RECORD,,
|
web_search/__init__.py
CHANGED
@@ -6,7 +6,7 @@ from .config import WebSearchConfig
|
|
6
6
|
from .web_searcher import WebSearcher
|
7
7
|
from dataclasses import dataclass
|
8
8
|
from kirara_ai.workflow.core.block import BlockRegistry
|
9
|
-
from .blocks import WebSearchBlock,WebSearchByKeywordBlock
|
9
|
+
from .blocks import WebSearchBlock,WebSearchByKeywordBlock, DouyinVideoSearchBlock
|
10
10
|
from .blocks import AppendSystemPromptBlock
|
11
11
|
from kirara_ai.ioc.inject import Inject
|
12
12
|
from kirara_ai.ioc.container import DependencyContainer
|
@@ -31,6 +31,7 @@ class WebSearchPlugin(Plugin):
|
|
31
31
|
try:
|
32
32
|
self.block_registry.register("web_search", "search", WebSearchBlock)
|
33
33
|
self.block_registry.register("web_search_by_keyword", "search", WebSearchByKeywordBlock)
|
34
|
+
self.block_registry.register("douyin_video_search", "search", DouyinVideoSearchBlock)
|
34
35
|
except Exception as e:
|
35
36
|
logger.warning(f"WebSearchPlugin failed: {e}")
|
36
37
|
try:
|
web_search/blocks.py
CHANGED
@@ -6,6 +6,8 @@ from .config import WebSearchConfig
|
|
6
6
|
from kirara_ai.llm.format.message import LLMChatMessage
|
7
7
|
from kirara_ai.llm.format.response import LLMChatResponse
|
8
8
|
from kirara_ai.ioc.container import DependencyContainer
|
9
|
+
import re
|
10
|
+
from kirara_ai.im.message import IMMessage
|
9
11
|
|
10
12
|
def get_options_provider(container: DependencyContainer, block: Block) -> List[str]:
|
11
13
|
return ["bing", "google", "baidu"]
|
@@ -166,3 +168,64 @@ class AppendSystemPromptBlock(Block):
|
|
166
168
|
|
167
169
|
return {"messages": messages}
|
168
170
|
|
171
|
+
class DouyinVideoSearchBlock(Block):
|
172
|
+
"""抖音视频搜索Block"""
|
173
|
+
name = "douyin_video_search"
|
174
|
+
description = "通过关键词搜索抖音视频"
|
175
|
+
container: DependencyContainer
|
176
|
+
inputs = {
|
177
|
+
"keyword": Input(name="keyword", label="搜索关键字", data_type=str, description="搜索关键词"),
|
178
|
+
"count": Input(name="count", label="视频数量", data_type=int, description="需要获取的视频数量")
|
179
|
+
}
|
180
|
+
|
181
|
+
outputs = {
|
182
|
+
"results": Output(name="results", label="搜索结果", data_type=str, description="视频链接列表")
|
183
|
+
}
|
184
|
+
|
185
|
+
def __init__(self, name: str = None, timeout: Optional[int] = 10, proxy: str = None):
|
186
|
+
super().__init__(name)
|
187
|
+
self.searcher = None
|
188
|
+
self.config = WebSearchConfig()
|
189
|
+
self.timeout = timeout
|
190
|
+
self.proxy = proxy
|
191
|
+
|
192
|
+
def _ensure_searcher(self):
|
193
|
+
"""同步方式初始化searcher"""
|
194
|
+
if not self.searcher:
|
195
|
+
try:
|
196
|
+
loop = asyncio.get_event_loop()
|
197
|
+
except RuntimeError:
|
198
|
+
loop = asyncio.new_event_loop()
|
199
|
+
asyncio.set_event_loop(loop)
|
200
|
+
self.searcher = loop.run_until_complete(WebSearcher.create())
|
201
|
+
|
202
|
+
def execute(self, **kwargs) -> Dict[str, Any]:
|
203
|
+
keyword = kwargs["keyword"]
|
204
|
+
count = kwargs["count"]
|
205
|
+
|
206
|
+
if not keyword:
|
207
|
+
return {"results": ""}
|
208
|
+
|
209
|
+
self._ensure_searcher()
|
210
|
+
|
211
|
+
try:
|
212
|
+
try:
|
213
|
+
loop = asyncio.get_event_loop()
|
214
|
+
except RuntimeError:
|
215
|
+
loop = asyncio.new_event_loop()
|
216
|
+
asyncio.set_event_loop(loop)
|
217
|
+
|
218
|
+
results = loop.run_until_complete(
|
219
|
+
self.searcher.search_douyin_videos(
|
220
|
+
keyword=keyword,
|
221
|
+
count=count,
|
222
|
+
timeout=self.timeout,
|
223
|
+
proxy=self.proxy,
|
224
|
+
sender = self.container.resolve(IMMessage).sender.user_id
|
225
|
+
)
|
226
|
+
)
|
227
|
+
return {"results": f"\n以下是抖音视频搜索结果:\n{results}"}
|
228
|
+
except Exception as e:
|
229
|
+
print(e)
|
230
|
+
return {"results": f"搜索失败: {str(e)}"}
|
231
|
+
|
web_search/web_searcher.py
CHANGED
@@ -8,14 +8,27 @@ import subprocess
|
|
8
8
|
import sys
|
9
9
|
from kirara_ai.logger import get_logger
|
10
10
|
import os
|
11
|
+
import re
|
12
|
+
import requests
|
13
|
+
import json
|
14
|
+
from kirara_ai.im.message import IMMessage
|
15
|
+
from kirara_ai.im.sender import ChatSender
|
16
|
+
import yaml
|
17
|
+
from datetime import datetime, date
|
11
18
|
|
12
19
|
logger = get_logger("WebSearchPlugin")
|
13
|
-
|
20
|
+
user_videoIds = {}
|
14
21
|
class WebSearcher:
|
22
|
+
|
15
23
|
def __init__(self):
|
24
|
+
|
16
25
|
self.playwright = None
|
17
26
|
self.browser = None
|
18
27
|
self.context = None
|
28
|
+
# 获取当前文件所在目录
|
29
|
+
current_dir = os.path.dirname(os.path.abspath(__file__))
|
30
|
+
self.video_ids_file = os.path.join(current_dir, "douyin_video_ids.yaml")
|
31
|
+
self.video_ids = self._load_video_ids()
|
19
32
|
self.search_engines = {
|
20
33
|
'bing': {
|
21
34
|
'url': 'https://www.bing.com/search?q={}',
|
@@ -49,10 +62,12 @@ class WebSearcher:
|
|
49
62
|
async def _ensure_initialized(self,proxy):
|
50
63
|
"""确保浏览器已初始化"""
|
51
64
|
try:
|
65
|
+
if self.context:
|
66
|
+
return self.context
|
52
67
|
self.playwright = await async_playwright().start()
|
53
68
|
|
54
69
|
# 创建用户数据目录路径
|
55
|
-
user_data_dir = os.path.join(os.path.expanduser("~"), ".playwright_user_data")
|
70
|
+
user_data_dir = os.path.join(os.path.expanduser("~"), ".playwright_user_data")+f'{random.randint(1, 1000000)}'
|
56
71
|
os.makedirs(user_data_dir, exist_ok=True)
|
57
72
|
|
58
73
|
# 合并所有选项到一个字典
|
@@ -323,3 +338,143 @@ class WebSearcher:
|
|
323
338
|
await self.context.close()
|
324
339
|
if self.playwright:
|
325
340
|
await self.playwright.stop()
|
341
|
+
|
342
|
+
def _load_video_ids(self):
|
343
|
+
"""从YAML文件加载视频ID记录"""
|
344
|
+
try:
|
345
|
+
today = str(date.today())
|
346
|
+
if os.path.exists(self.video_ids_file):
|
347
|
+
with open(self.video_ids_file, 'r', encoding='utf-8') as f:
|
348
|
+
data = yaml.safe_load(f) or {}
|
349
|
+
# 检查是否是今天的数据
|
350
|
+
if data.get('date') == today:
|
351
|
+
return data.get('video_ids', {})
|
352
|
+
|
353
|
+
# 如果文件不存在、数据为空或日期不是今天,创建新的空记录
|
354
|
+
empty_data = {
|
355
|
+
'date': today,
|
356
|
+
'video_ids': {}
|
357
|
+
}
|
358
|
+
with open(self.video_ids_file, 'w', encoding='utf-8') as f:
|
359
|
+
yaml.safe_dump(empty_data, f, allow_unicode=True)
|
360
|
+
return empty_data['video_ids']
|
361
|
+
except Exception as e:
|
362
|
+
logger.error(f"Failed to load video IDs: {e}")
|
363
|
+
return {}
|
364
|
+
|
365
|
+
def _save_video_ids(self):
|
366
|
+
"""保存视频ID记录到YAML文件"""
|
367
|
+
try:
|
368
|
+
data = {
|
369
|
+
'date': str(date.today()),
|
370
|
+
'video_ids': self.video_ids
|
371
|
+
}
|
372
|
+
# 确保目录存在
|
373
|
+
os.makedirs(os.path.dirname(self.video_ids_file), exist_ok=True)
|
374
|
+
# 使用 'w' 模式覆盖写入文件
|
375
|
+
with open(self.video_ids_file, 'w', encoding='utf-8') as f:
|
376
|
+
yaml.safe_dump(data, f, allow_unicode=True)
|
377
|
+
except Exception as e:
|
378
|
+
logger.error(f"Failed to save video IDs: {e}")
|
379
|
+
|
380
|
+
async def search_douyin_videos(self, keyword: str, count: int = 5, timeout: int = 10, proxy: str = None, sender: str = None) -> str:
|
381
|
+
"""搜索抖音视频"""
|
382
|
+
context = await self._ensure_initialized(proxy)
|
383
|
+
page = None
|
384
|
+
|
385
|
+
if not sender:
|
386
|
+
return "需要提供发送者ID"
|
387
|
+
|
388
|
+
if sender not in self.video_ids:
|
389
|
+
self.video_ids[sender] = []
|
390
|
+
|
391
|
+
try:
|
392
|
+
# 构建搜索URL
|
393
|
+
search_url = f'https://www.douyin.com/search/{urllib.parse.quote(keyword)}'
|
394
|
+
page = await context.new_page()
|
395
|
+
|
396
|
+
# 设置User-Agent
|
397
|
+
await page.set_extra_http_headers({
|
398
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
399
|
+
})
|
400
|
+
|
401
|
+
# 访问搜索页面并等待包含waterfall_item_的内容出现
|
402
|
+
await page.goto(search_url, wait_until='domcontentloaded')
|
403
|
+
|
404
|
+
# 等待页面中出现waterfall_item_字符串
|
405
|
+
await page.wait_for_function('''
|
406
|
+
() => document.documentElement.innerHTML.includes('waterfall_item_')
|
407
|
+
''', timeout=timeout * 1000)
|
408
|
+
|
409
|
+
# 获取ttwid
|
410
|
+
url = "https://ttwid.bytedance.com/ttwid/union/register/"
|
411
|
+
ttjson = {"region": "cn", "aid": 1768, "needFid": "false", "service": "www.ixigua.com",
|
412
|
+
"migrate_info": {"ticket": "", "source": "node"}, "cbUrlProtocol": "https", "union": "true"}
|
413
|
+
ttresponse = requests.post(url, json=ttjson)
|
414
|
+
tt = ttresponse.cookies.get_dict()['ttwid']
|
415
|
+
|
416
|
+
video_links = []
|
417
|
+
videoCount = 0
|
418
|
+
max_scroll_attempts = 10 # 最大滚动尝试次数
|
419
|
+
scroll_attempt = 0
|
420
|
+
while videoCount < count and scroll_attempt < max_scroll_attempts:
|
421
|
+
# 提取视频ID
|
422
|
+
video_elements = await page.query_selector_all('div[id^="waterfall_item_"]')
|
423
|
+
|
424
|
+
# 处理当前页面上的视频
|
425
|
+
for i, element in enumerate(video_elements):
|
426
|
+
if videoCount >= count:
|
427
|
+
break
|
428
|
+
|
429
|
+
# 获取元素的id属性
|
430
|
+
div_id = await element.get_attribute('id')
|
431
|
+
video_id = div_id.replace('waterfall_item_', '')
|
432
|
+
if video_id not in self.video_ids[sender]:
|
433
|
+
if video_id.isdigit(): # 确保是数字ID
|
434
|
+
url = f'https://www.douyin.com/video/{video_id}'
|
435
|
+
try:
|
436
|
+
headers = {
|
437
|
+
"referer": "https://www.douyin.com/",
|
438
|
+
"user-agent": "Mozilla/5.0 (Linux; Android 12; 2210132C Build/SP1A.210812.016) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.104 Mobile Safari/537.36",
|
439
|
+
"cookie": "ttwid=" + tt
|
440
|
+
}
|
441
|
+
video_url = url
|
442
|
+
aweme_id = re.findall('video/(\d+)', url)[0]
|
443
|
+
url1 = f"https://www.iesdouyin.com/share/video/{aweme_id}"
|
444
|
+
resp1 = requests.get(url1, headers=headers).text.encode('gbk', errors='ignore').decode('gbk')
|
445
|
+
json_data = resp1.split("window._ROUTER_DATA = ")[1].split("</script>")[0]
|
446
|
+
resp1 = json.loads(json_data.encode('gbk', errors='ignore').decode('gbk'))
|
447
|
+
video_url = resp1["loaderData"]["video_(id)/page"]["videoInfoRes"]["item_list"][0]["video"]["play_addr"]["url_list"][0]
|
448
|
+
video_url = video_url.replace("playwm", "play").replace("720p", "1080p")
|
449
|
+
imgresponse = requests.get(video_url, allow_redirects=False)
|
450
|
+
new_url = imgresponse.headers.get('Location')
|
451
|
+
if new_url: # 只有获取到重定向地址才添加
|
452
|
+
logger.debug(new_url)
|
453
|
+
video_links.append(f'[{videoCount+1}] {new_url}')
|
454
|
+
videoCount += 1
|
455
|
+
self.video_ids[sender].append(video_id)
|
456
|
+
self._save_video_ids() # 每添加一个视频就保存一次
|
457
|
+
except Exception as e:
|
458
|
+
continue
|
459
|
+
|
460
|
+
# 如果还没有获取够视频,继续滚动加载
|
461
|
+
if videoCount < count:
|
462
|
+
# 滚动到页面底部
|
463
|
+
await page.evaluate('window.scrollTo(0, document.body.scrollHeight)')
|
464
|
+
await asyncio.sleep(2) # 等待新内容加载
|
465
|
+
scroll_attempt += 1
|
466
|
+
if scroll_attempt == 10:
|
467
|
+
self.video_ids[sender] = []
|
468
|
+
self._save_video_ids()
|
469
|
+
|
470
|
+
return ("视频url地址:\n"+"\n".join(video_links)) if video_links else "未找到视频"
|
471
|
+
|
472
|
+
except Exception as e:
|
473
|
+
logger.error(f"抖音视频搜索失败 - 关键词: {keyword} - 错误: {e}", exc_info=True)
|
474
|
+
return f"搜索失败: {str(e)}"
|
475
|
+
finally:
|
476
|
+
if page:
|
477
|
+
try:
|
478
|
+
await page.close()
|
479
|
+
except Exception as e:
|
480
|
+
logger.error(f"关闭页面错误: {e}")
|
@@ -1,11 +0,0 @@
|
|
1
|
-
web_search/__init__.py,sha256=cDyiTyQ4i-7brk3qfQrPBF5XQNpDacWmauzJwTlK6VI,4485
|
2
|
-
web_search/blocks.py,sha256=S3RsV9CCTKAsKUNhewg__ejEpJRDz7DTawtH05WRgE8,6732
|
3
|
-
web_search/config.py,sha256=DhLiERBJR2V5Boglf7Aq9Rbc4vsvLIh67CrLDIPeqA0,398
|
4
|
-
web_search/web_searcher.py,sha256=0zLgMsWCK71gStyWpFjup5WfxHx3tBTf3rGwM7Ae7Zs,13332
|
5
|
-
web_search/example/roleplayWithWebSearch.yaml,sha256=C-dGy3z8gcRcmxzurssP-kPRLqMf1TYR-nnNUaJjISE,7468
|
6
|
-
chatgpt_mirai_qq_bot_web_search-0.2.2.dist-info/LICENSE,sha256=ILBn-G3jdarm2w8oOrLmXeJNU3czuJvVhDLBASWdhM8,34522
|
7
|
-
chatgpt_mirai_qq_bot_web_search-0.2.2.dist-info/METADATA,sha256=Skc-om3-q2TH-RUte06VDlqyOD4QC17hpqaS2UanRCM,1738
|
8
|
-
chatgpt_mirai_qq_bot_web_search-0.2.2.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
|
9
|
-
chatgpt_mirai_qq_bot_web_search-0.2.2.dist-info/entry_points.txt,sha256=o3kRDSdSmSdnCKlK6qS57aN0WpI4ab-Nxub2NwUrjf0,64
|
10
|
-
chatgpt_mirai_qq_bot_web_search-0.2.2.dist-info/top_level.txt,sha256=PoNm8MJYw_y8RTMaNlY0ePLoNHxVUAE2IHDuL5fFubI,11
|
11
|
-
chatgpt_mirai_qq_bot_web_search-0.2.2.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|
File without changes
|