chatgpt-mirai-qq-bot-web-search 0.2.1__py3-none-any.whl → 0.2.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: chatgpt-mirai-qq-bot-web-search
3
- Version: 0.2.1
3
+ Version: 0.2.3
4
4
  Summary: WebSearch adapter for lss233/chatgpt-mirai-qq-bot
5
5
  Home-page: https://github.com/chuanSir123/web_search
6
6
  Author: chuanSir
@@ -18,7 +18,7 @@ Requires-Dist: playwright
18
18
  Requires-Dist: trafilatura
19
19
  Requires-Dist: lxml-html-clean
20
20
 
21
- # OneBot-adapter for ChatGPT-Mirai-QQ-Bot
21
+ # web_search for ChatGPT-Mirai-QQ-Bot
22
22
 
23
23
  本项目是 [ChatGPT-Mirai-QQ-Bot](https://github.com/lss233/chatgpt-mirai-qq-bot) 的一个插件,用于将OneBot协议的消息转换为ChatGPT-Mirai-QQ-Bot的消息格式。
24
24
 
@@ -0,0 +1,11 @@
1
+ web_search/__init__.py,sha256=bpu1gpf9tq3mOAoaGMM-8S6GBw2GzRAG6ClHGlp-CVw,4607
2
+ web_search/blocks.py,sha256=NJqWOAp7X-WMM3c3Xn3qLb9IVV090W3MqELHZTAw8tk,8959
3
+ web_search/config.py,sha256=DhLiERBJR2V5Boglf7Aq9Rbc4vsvLIh67CrLDIPeqA0,398
4
+ web_search/web_searcher.py,sha256=d6sFvaMMcm0vXPDRjAVPAvUcrPN6fU7cxbxikFyE23w,20920
5
+ web_search/example/roleplayWithWebSearch.yaml,sha256=C-dGy3z8gcRcmxzurssP-kPRLqMf1TYR-nnNUaJjISE,7468
6
+ chatgpt_mirai_qq_bot_web_search-0.2.3.dist-info/LICENSE,sha256=ILBn-G3jdarm2w8oOrLmXeJNU3czuJvVhDLBASWdhM8,34522
7
+ chatgpt_mirai_qq_bot_web_search-0.2.3.dist-info/METADATA,sha256=vlaZCwGj4Rts9xD-zq7nabGU7MC0vDHDP4nqFmDKQec,1734
8
+ chatgpt_mirai_qq_bot_web_search-0.2.3.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
9
+ chatgpt_mirai_qq_bot_web_search-0.2.3.dist-info/entry_points.txt,sha256=o3kRDSdSmSdnCKlK6qS57aN0WpI4ab-Nxub2NwUrjf0,64
10
+ chatgpt_mirai_qq_bot_web_search-0.2.3.dist-info/top_level.txt,sha256=PoNm8MJYw_y8RTMaNlY0ePLoNHxVUAE2IHDuL5fFubI,11
11
+ chatgpt_mirai_qq_bot_web_search-0.2.3.dist-info/RECORD,,
web_search/__init__.py CHANGED
@@ -6,7 +6,7 @@ from .config import WebSearchConfig
6
6
  from .web_searcher import WebSearcher
7
7
  from dataclasses import dataclass
8
8
  from kirara_ai.workflow.core.block import BlockRegistry
9
- from .blocks import WebSearchBlock
9
+ from .blocks import WebSearchBlock,WebSearchByKeywordBlock, DouyinVideoSearchBlock
10
10
  from .blocks import AppendSystemPromptBlock
11
11
  from kirara_ai.ioc.inject import Inject
12
12
  from kirara_ai.ioc.container import DependencyContainer
@@ -30,6 +30,8 @@ class WebSearchPlugin(Plugin):
30
30
  # 注册Block
31
31
  try:
32
32
  self.block_registry.register("web_search", "search", WebSearchBlock)
33
+ self.block_registry.register("web_search_by_keyword", "search", WebSearchByKeywordBlock)
34
+ self.block_registry.register("douyin_video_search", "search", DouyinVideoSearchBlock)
33
35
  except Exception as e:
34
36
  logger.warning(f"WebSearchPlugin failed: {e}")
35
37
  try:
web_search/blocks.py CHANGED
@@ -6,6 +6,8 @@ from .config import WebSearchConfig
6
6
  from kirara_ai.llm.format.message import LLMChatMessage
7
7
  from kirara_ai.llm.format.response import LLMChatResponse
8
8
  from kirara_ai.ioc.container import DependencyContainer
9
+ import re
10
+ from kirara_ai.im.message import IMMessage
9
11
 
10
12
  def get_options_provider(container: DependencyContainer, block: Block) -> List[str]:
11
13
  return ["bing", "google", "baidu"]
@@ -166,3 +168,64 @@ class AppendSystemPromptBlock(Block):
166
168
 
167
169
  return {"messages": messages}
168
170
 
171
+ class DouyinVideoSearchBlock(Block):
172
+ """抖音视频搜索Block"""
173
+ name = "douyin_video_search"
174
+ description = "通过关键词搜索抖音视频"
175
+ container: DependencyContainer
176
+ inputs = {
177
+ "keyword": Input(name="keyword", label="搜索关键字", data_type=str, description="搜索关键词"),
178
+ "count": Input(name="count", label="视频数量", data_type=int, description="需要获取的视频数量")
179
+ }
180
+
181
+ outputs = {
182
+ "results": Output(name="results", label="搜索结果", data_type=str, description="视频链接列表")
183
+ }
184
+
185
+ def __init__(self, name: str = None, timeout: Optional[int] = 10, proxy: str = None):
186
+ super().__init__(name)
187
+ self.searcher = None
188
+ self.config = WebSearchConfig()
189
+ self.timeout = timeout
190
+ self.proxy = proxy
191
+
192
+ def _ensure_searcher(self):
193
+ """同步方式初始化searcher"""
194
+ if not self.searcher:
195
+ try:
196
+ loop = asyncio.get_event_loop()
197
+ except RuntimeError:
198
+ loop = asyncio.new_event_loop()
199
+ asyncio.set_event_loop(loop)
200
+ self.searcher = loop.run_until_complete(WebSearcher.create())
201
+
202
+ def execute(self, **kwargs) -> Dict[str, Any]:
203
+ keyword = kwargs["keyword"]
204
+ count = kwargs["count"]
205
+
206
+ if not keyword:
207
+ return {"results": ""}
208
+
209
+ self._ensure_searcher()
210
+
211
+ try:
212
+ try:
213
+ loop = asyncio.get_event_loop()
214
+ except RuntimeError:
215
+ loop = asyncio.new_event_loop()
216
+ asyncio.set_event_loop(loop)
217
+
218
+ results = loop.run_until_complete(
219
+ self.searcher.search_douyin_videos(
220
+ keyword=keyword,
221
+ count=count,
222
+ timeout=self.timeout,
223
+ proxy=self.proxy,
224
+ sender = self.container.resolve(IMMessage).sender.user_id
225
+ )
226
+ )
227
+ return {"results": f"\n以下是抖音视频搜索结果:\n{results}"}
228
+ except Exception as e:
229
+ print(e)
230
+ return {"results": f"搜索失败: {str(e)}"}
231
+
@@ -8,14 +8,27 @@ import subprocess
8
8
  import sys
9
9
  from kirara_ai.logger import get_logger
10
10
  import os
11
+ import re
12
+ import requests
13
+ import json
14
+ from kirara_ai.im.message import IMMessage
15
+ from kirara_ai.im.sender import ChatSender
16
+ import yaml
17
+ from datetime import datetime, date
11
18
 
12
19
  logger = get_logger("WebSearchPlugin")
13
-
20
+ user_videoIds = {}
14
21
  class WebSearcher:
22
+
15
23
  def __init__(self):
24
+
16
25
  self.playwright = None
17
26
  self.browser = None
18
27
  self.context = None
28
+ # 获取当前文件所在目录
29
+ current_dir = os.path.dirname(os.path.abspath(__file__))
30
+ self.video_ids_file = os.path.join(current_dir, "douyin_video_ids.yaml")
31
+ self.video_ids = self._load_video_ids()
19
32
  self.search_engines = {
20
33
  'bing': {
21
34
  'url': 'https://www.bing.com/search?q={}',
@@ -49,15 +62,17 @@ class WebSearcher:
49
62
  async def _ensure_initialized(self,proxy):
50
63
  """确保浏览器已初始化"""
51
64
  try:
65
+ if self.context:
66
+ return self.context
52
67
  self.playwright = await async_playwright().start()
53
68
 
54
69
  # 创建用户数据目录路径
55
- user_data_dir = os.path.join(os.path.expanduser("~"), ".playwright_user_data")
70
+ user_data_dir = os.path.join(os.path.expanduser("~"), ".playwright_user_data")+f'{random.randint(1, 1000000)}'
56
71
  os.makedirs(user_data_dir, exist_ok=True)
57
72
 
58
73
  # 合并所有选项到一个字典
59
74
  context_options = {
60
- 'headless': True,
75
+ 'headless': False,
61
76
  'chromium_sandbox': False,
62
77
  'slow_mo': 50, # 减慢操作速度,更像人类
63
78
  'args': [
@@ -323,3 +338,143 @@ class WebSearcher:
323
338
  await self.context.close()
324
339
  if self.playwright:
325
340
  await self.playwright.stop()
341
+
342
+ def _load_video_ids(self):
343
+ """从YAML文件加载视频ID记录"""
344
+ try:
345
+ today = str(date.today())
346
+ if os.path.exists(self.video_ids_file):
347
+ with open(self.video_ids_file, 'r', encoding='utf-8') as f:
348
+ data = yaml.safe_load(f) or {}
349
+ # 检查是否是今天的数据
350
+ if data.get('date') == today:
351
+ return data.get('video_ids', {})
352
+
353
+ # 如果文件不存在、数据为空或日期不是今天,创建新的空记录
354
+ empty_data = {
355
+ 'date': today,
356
+ 'video_ids': {}
357
+ }
358
+ with open(self.video_ids_file, 'w', encoding='utf-8') as f:
359
+ yaml.safe_dump(empty_data, f, allow_unicode=True)
360
+ return empty_data['video_ids']
361
+ except Exception as e:
362
+ logger.error(f"Failed to load video IDs: {e}")
363
+ return {}
364
+
365
+ def _save_video_ids(self):
366
+ """保存视频ID记录到YAML文件"""
367
+ try:
368
+ data = {
369
+ 'date': str(date.today()),
370
+ 'video_ids': self.video_ids
371
+ }
372
+ # 确保目录存在
373
+ os.makedirs(os.path.dirname(self.video_ids_file), exist_ok=True)
374
+ # 使用 'w' 模式覆盖写入文件
375
+ with open(self.video_ids_file, 'w', encoding='utf-8') as f:
376
+ yaml.safe_dump(data, f, allow_unicode=True)
377
+ except Exception as e:
378
+ logger.error(f"Failed to save video IDs: {e}")
379
+
380
+ async def search_douyin_videos(self, keyword: str, count: int = 5, timeout: int = 10, proxy: str = None, sender: str = None) -> str:
381
+ """搜索抖音视频"""
382
+ context = await self._ensure_initialized(proxy)
383
+ page = None
384
+
385
+ if not sender:
386
+ return "需要提供发送者ID"
387
+
388
+ if sender not in self.video_ids:
389
+ self.video_ids[sender] = []
390
+
391
+ try:
392
+ # 构建搜索URL
393
+ search_url = f'https://www.douyin.com/search/{urllib.parse.quote(keyword)}'
394
+ page = await context.new_page()
395
+
396
+ # 设置User-Agent
397
+ await page.set_extra_http_headers({
398
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
399
+ })
400
+
401
+ # 访问搜索页面并等待包含waterfall_item_的内容出现
402
+ await page.goto(search_url, wait_until='domcontentloaded')
403
+
404
+ # 等待页面中出现waterfall_item_字符串
405
+ await page.wait_for_function('''
406
+ () => document.documentElement.innerHTML.includes('waterfall_item_')
407
+ ''', timeout=timeout * 1000)
408
+
409
+ # 获取ttwid
410
+ url = "https://ttwid.bytedance.com/ttwid/union/register/"
411
+ ttjson = {"region": "cn", "aid": 1768, "needFid": "false", "service": "www.ixigua.com",
412
+ "migrate_info": {"ticket": "", "source": "node"}, "cbUrlProtocol": "https", "union": "true"}
413
+ ttresponse = requests.post(url, json=ttjson)
414
+ tt = ttresponse.cookies.get_dict()['ttwid']
415
+
416
+ video_links = []
417
+ videoCount = 0
418
+ max_scroll_attempts = 10 # 最大滚动尝试次数
419
+ scroll_attempt = 0
420
+ while videoCount < count and scroll_attempt < max_scroll_attempts:
421
+ # 提取视频ID
422
+ video_elements = await page.query_selector_all('div[id^="waterfall_item_"]')
423
+
424
+ # 处理当前页面上的视频
425
+ for i, element in enumerate(video_elements):
426
+ if videoCount >= count:
427
+ break
428
+
429
+ # 获取元素的id属性
430
+ div_id = await element.get_attribute('id')
431
+ video_id = div_id.replace('waterfall_item_', '')
432
+ if video_id not in self.video_ids[sender]:
433
+ if video_id.isdigit(): # 确保是数字ID
434
+ url = f'https://www.douyin.com/video/{video_id}'
435
+ try:
436
+ headers = {
437
+ "referer": "https://www.douyin.com/",
438
+ "user-agent": "Mozilla/5.0 (Linux; Android 12; 2210132C Build/SP1A.210812.016) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.104 Mobile Safari/537.36",
439
+ "cookie": "ttwid=" + tt
440
+ }
441
+ video_url = url
442
+ aweme_id = re.findall('video/(\d+)', url)[0]
443
+ url1 = f"https://www.iesdouyin.com/share/video/{aweme_id}"
444
+ resp1 = requests.get(url1, headers=headers).text.encode('gbk', errors='ignore').decode('gbk')
445
+ json_data = resp1.split("window._ROUTER_DATA = ")[1].split("</script>")[0]
446
+ resp1 = json.loads(json_data.encode('gbk', errors='ignore').decode('gbk'))
447
+ video_url = resp1["loaderData"]["video_(id)/page"]["videoInfoRes"]["item_list"][0]["video"]["play_addr"]["url_list"][0]
448
+ video_url = video_url.replace("playwm", "play").replace("720p", "1080p")
449
+ imgresponse = requests.get(video_url, allow_redirects=False)
450
+ new_url = imgresponse.headers.get('Location')
451
+ if new_url: # 只有获取到重定向地址才添加
452
+ logger.debug(new_url)
453
+ video_links.append(f'[{videoCount+1}] {new_url}')
454
+ videoCount += 1
455
+ self.video_ids[sender].append(video_id)
456
+ self._save_video_ids() # 每添加一个视频就保存一次
457
+ except Exception as e:
458
+ continue
459
+
460
+ # 如果还没有获取够视频,继续滚动加载
461
+ if videoCount < count:
462
+ # 滚动到页面底部
463
+ await page.evaluate('window.scrollTo(0, document.body.scrollHeight)')
464
+ await asyncio.sleep(2) # 等待新内容加载
465
+ scroll_attempt += 1
466
+ if scroll_attempt == 10:
467
+ self.video_ids[sender] = []
468
+ self._save_video_ids()
469
+
470
+ return ("视频url地址:\n"+"\n".join(video_links)) if video_links else "未找到视频"
471
+
472
+ except Exception as e:
473
+ logger.error(f"抖音视频搜索失败 - 关键词: {keyword} - 错误: {e}", exc_info=True)
474
+ return f"搜索失败: {str(e)}"
475
+ finally:
476
+ if page:
477
+ try:
478
+ await page.close()
479
+ except Exception as e:
480
+ logger.error(f"关闭页面错误: {e}")
@@ -1,11 +0,0 @@
1
- web_search/__init__.py,sha256=zVZLb5A-im5XETwohgxyE-UCxjSvYl6I2OC3LnEQhdQ,4360
2
- web_search/blocks.py,sha256=S3RsV9CCTKAsKUNhewg__ejEpJRDz7DTawtH05WRgE8,6732
3
- web_search/config.py,sha256=DhLiERBJR2V5Boglf7Aq9Rbc4vsvLIh67CrLDIPeqA0,398
4
- web_search/web_searcher.py,sha256=0zLgMsWCK71gStyWpFjup5WfxHx3tBTf3rGwM7Ae7Zs,13332
5
- web_search/example/roleplayWithWebSearch.yaml,sha256=C-dGy3z8gcRcmxzurssP-kPRLqMf1TYR-nnNUaJjISE,7468
6
- chatgpt_mirai_qq_bot_web_search-0.2.1.dist-info/LICENSE,sha256=ILBn-G3jdarm2w8oOrLmXeJNU3czuJvVhDLBASWdhM8,34522
7
- chatgpt_mirai_qq_bot_web_search-0.2.1.dist-info/METADATA,sha256=Gt59c1F8TCJFClQ0qqdMvrCQ2gpeHHcIVH9cbVli-zw,1738
8
- chatgpt_mirai_qq_bot_web_search-0.2.1.dist-info/WHEEL,sha256=tZoeGjtWxWRfdplE7E3d45VPlLNQnvbKiYnx7gwAy8A,92
9
- chatgpt_mirai_qq_bot_web_search-0.2.1.dist-info/entry_points.txt,sha256=o3kRDSdSmSdnCKlK6qS57aN0WpI4ab-Nxub2NwUrjf0,64
10
- chatgpt_mirai_qq_bot_web_search-0.2.1.dist-info/top_level.txt,sha256=PoNm8MJYw_y8RTMaNlY0ePLoNHxVUAE2IHDuL5fFubI,11
11
- chatgpt_mirai_qq_bot_web_search-0.2.1.dist-info/RECORD,,