entari-plugin-hyw 3.3.8__py3-none-any.whl → 3.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of entari-plugin-hyw might be problematic. Click here for more details.

@@ -39,10 +39,8 @@ class ProcessingPipeline:
39
39
  self.client = AsyncOpenAI(base_url=self.config.base_url, api_key=self.config.api_key)
40
40
  self.all_web_results = [] # Cache for search results
41
41
  self.current_mode = "standard" # standard | agent
42
- # Independent ID counters for each type
43
- self.search_id_counter = 0
44
- self.page_id_counter = 0
45
- self.image_id_counter = 0
42
+ # Global ID counter for all types (unified numbering)
43
+ self.global_id_counter = 0
46
44
 
47
45
  self.web_search_tool = {
48
46
  "type": "function",
@@ -628,7 +626,12 @@ class ProcessingPipeline:
628
626
  }
629
627
 
630
628
  def _parse_tagged_response(self, text: str) -> Dict[str, Any]:
631
- """Parse response for references and page references reordered by appearance."""
629
+ """Parse response and auto-infer references from [N] citations in body text.
630
+
631
+ New simplified format:
632
+ - Body text uses [1][2] format for citations
633
+ - No ref code block needed - we auto-infer from citations
634
+ """
632
635
  parsed = {"response": "", "references": [], "page_references": [], "image_references": [], "flow_steps": []}
633
636
  if not text:
634
637
  return parsed
@@ -646,104 +649,65 @@ class ProcessingPipeline:
646
649
  except Exception:
647
650
  pass
648
651
 
649
- # 2. Extract references from text first (Order by appearance)
650
- # Pattern matches [search:123], [page:123], [image:123]
651
- pattern = re.compile(r'\[(search|page|image):(\d+)\]', re.IGNORECASE)
652
-
653
- matches = list(pattern.finditer(remaining_text))
652
+ # 2. Extract all [N] citations from body text (scan left to right for order)
653
+ body_pattern = re.compile(r'\[(\d+)\]')
654
+ id_order = [] # Preserve citation order
654
655
 
655
- search_map = {} # old_id_str -> new_id (int)
656
- page_map = {}
657
- image_map = {}
656
+ for match in body_pattern.finditer(remaining_text):
657
+ try:
658
+ id_val = int(match.group(1))
659
+ if id_val not in id_order:
660
+ id_order.append(id_val)
661
+ except ValueError:
662
+ pass
663
+
664
+ # 3. Build references by looking up cited IDs in all_web_results
665
+ # Order by appearance in text
666
+ old_to_new_map = {} # old_id -> new_id (for search & page only)
658
667
 
659
- def process_ref(tag_type, old_id):
660
- # Find in all_web_results
661
- result_item = next((r for r in self.all_web_results if r.get("_id") == old_id and r.get("_type") == tag_type), None)
668
+ for old_id in id_order:
669
+ # Find in all_web_results by _id
670
+ result_item = next((r for r in self.all_web_results if r.get("_id") == old_id), None)
662
671
 
663
- if not result_item:
664
- return
672
+ if result_item:
673
+ entry = {
674
+ "title": result_item.get("title", ""),
675
+ "url": result_item.get("url", ""),
676
+ "domain": result_item.get("domain", "")
677
+ }
665
678
 
666
- entry = {
667
- "title": result_item.get("title", ""),
668
- "url": result_item.get("url", ""),
669
- "domain": result_item.get("domain", "")
670
- }
671
- if tag_type == "image":
672
- entry["thumbnail"] = result_item.get("thumbnail", "")
673
-
674
- # Add to respective list and map
675
- # Check maps to avoid duplicates
676
- if tag_type == "search":
677
- if str(old_id) not in search_map:
679
+ item_type = result_item.get("_type", "")
680
+
681
+ # Auto-classify by type
682
+ if item_type == "search":
678
683
  parsed["references"].append(entry)
679
- search_map[str(old_id)] = len(parsed["references"])
680
- elif tag_type == "page":
681
- if str(old_id) not in page_map:
684
+ old_to_new_map[old_id] = len(parsed["references"])
685
+ elif item_type == "page":
682
686
  parsed["page_references"].append(entry)
683
- page_map[str(old_id)] = len(parsed["page_references"])
684
- elif tag_type == "image":
685
- if str(old_id) not in image_map:
687
+ old_to_new_map[old_id] = len(parsed["page_references"])
688
+ elif item_type == "image":
689
+ # Collect image but don't add to map (will be stripped from text)
690
+ entry["thumbnail"] = result_item.get("thumbnail", "")
686
691
  parsed["image_references"].append(entry)
687
- image_map[str(old_id)] = len(parsed["image_references"])
692
+ # Note: no old_to_new_map entry - image citations will be removed
688
693
 
689
- # Pass 1: Text Body
690
- for m in matches:
694
+ # 4. Replace [old_id] with [new_id] in text, or remove if image
695
+ def replace_id(match):
691
696
  try:
692
- process_ref(m.group(1).lower(), int(m.group(2)))
693
- except ValueError:
694
- continue
695
-
696
- # 3. Pass 2: References Block (Capture items missed in text)
697
- ref_block_match = re.search(r'```references\s*(.*?)\s*```', remaining_text, re.DOTALL | re.IGNORECASE)
698
- if ref_block_match:
699
- ref_content = ref_block_match.group(1).strip()
700
- remaining_text = remaining_text.replace(ref_block_match.group(0), "").strip()
701
-
702
- for line in ref_content.split("\n"):
703
- line = line.strip()
704
- if not line: continue
705
- # Match [id] [type]
706
- # e.g. [1] [image] ... or [image:1] ...
707
-
708
- # Check for [id] [type] format
709
- id_match = re.match(r"^\[(\d+)\]\s*\[(search|page|image)\]", line, re.IGNORECASE)
710
- if id_match:
711
- try:
712
- process_ref(id_match.group(2).lower(), int(id_match.group(1)))
713
- except ValueError:
714
- pass
697
+ old_id = int(match.group(1))
698
+ new_id = old_to_new_map.get(old_id)
699
+ if new_id is not None:
700
+ return f"[{new_id}]"
715
701
  else:
716
- # Check for [type:id] format in list
717
- alt_match = re.match(r"^\[(search|page|image):(\d+)\]", line, re.IGNORECASE)
718
- if alt_match:
719
- try:
720
- process_ref(alt_match.group(1).lower(), int(alt_match.group(2)))
721
- except ValueError:
722
- pass
723
-
724
- # 4. Replace tags in text with new sequential IDs
725
-
726
- # 4. Replace tags in text with new sequential IDs
727
- def replace_tag(match):
728
- tag_type = match.group(1).lower()
729
- old_id = match.group(2)
730
-
731
- new_id = None
732
- if tag_type == "search":
733
- new_id = search_map.get(old_id)
734
- elif tag_type == "page":
735
- new_id = page_map.get(old_id)
736
- elif tag_type == "image":
737
- new_id = image_map.get(old_id)
738
-
739
- if new_id is not None:
740
- if tag_type == "image":
741
- return ""
742
- return f"[{tag_type}:{new_id}]"
743
-
702
+ # Check if it's an image reference (not in map)
703
+ item = next((r for r in self.all_web_results if r.get("_id") == old_id), None)
704
+ if item and item.get("_type") == "image":
705
+ return "" # Remove image citations from text
706
+ except ValueError:
707
+ pass
744
708
  return match.group(0)
745
709
 
746
- remaining_text = pattern.sub(replace_tag, remaining_text)
710
+ remaining_text = body_pattern.sub(replace_id, remaining_text)
747
711
 
748
712
  parsed["response"] = remaining_text.strip()
749
713
  return parsed
@@ -766,10 +730,10 @@ class ProcessingPipeline:
766
730
  query = args.get("query")
767
731
  web = await self.search_service.search(query)
768
732
 
769
- # Cache results and assign search-specific IDs
733
+ # Cache results and assign global IDs
770
734
  for item in web:
771
- self.search_id_counter += 1
772
- item["_id"] = self.search_id_counter
735
+ self.global_id_counter += 1
736
+ item["_id"] = self.global_id_counter
773
737
  item["_type"] = "search"
774
738
  item["query"] = query
775
739
  self.all_web_results.append(item)
@@ -780,10 +744,10 @@ class ProcessingPipeline:
780
744
  query = args.get("query")
781
745
  images = await self.search_service.image_search(query)
782
746
 
783
- # Cache results and assign image-specific IDs
747
+ # Cache results and assign global IDs
784
748
  for item in images:
785
- self.image_id_counter += 1
786
- item["_id"] = self.image_id_counter
749
+ self.global_id_counter += 1
750
+ item["_id"] = self.global_id_counter
787
751
  item["_type"] = "image"
788
752
  item["query"] = query
789
753
  item["is_image"] = True
@@ -797,11 +761,11 @@ class ProcessingPipeline:
797
761
  # Returns Dict: {content, title, url}
798
762
  result_dict = await self.search_service.fetch_page(url)
799
763
 
800
- # Cache the crawled content with page-specific ID
801
- self.page_id_counter += 1
764
+ # Cache the crawled content with global ID
765
+ self.global_id_counter += 1
802
766
 
803
767
  cached_item = {
804
- "_id": self.page_id_counter,
768
+ "_id": self.global_id_counter,
805
769
  "_type": "page",
806
770
  "title": result_dict.get("title", "Page"),
807
771
  "url": result_dict.get("url", url),
@@ -266,18 +266,27 @@ class ContentRenderer:
266
266
 
267
267
  content_html = restore_math(content_html)
268
268
 
269
- # Convert [search:N] to blue badge
270
- content_html = re.sub(
271
- r'\[search:(\d+)\]',
272
- r'<span class="inline-flex items-center justify-center min-w-[16px] h-4 px-0.5 text-[10px] font-bold text-blue-600 bg-blue-50 border border-blue-200 rounded mx-0.5 align-top relative -top-0.5">\1</span>',
273
- content_html
274
- )
275
- # Convert [page:N] to orange badge
276
- content_html = re.sub(
277
- r'\[page:(\d+)\]',
278
- r'<span class="inline-flex items-center justify-center min-w-[16px] h-4 px-0.5 text-[10px] font-bold text-orange-700 bg-orange-50 border border-orange-200 rounded mx-0.5 align-top relative -top-0.5">\1</span>',
279
- content_html
280
- )
269
+ # Convert [N] to colored badges based on index position
270
+ # - Numbers 1 to len(references) → blue (search results)
271
+ # - Numbers len(references)+1 to len(references)+len(page_references) → orange (page content)
272
+
273
+ num_search_refs = len(references) if references else 0
274
+ num_page_refs = len(page_references) if page_references else 0
275
+
276
+ def replace_badge(match):
277
+ n = int(match.group(1))
278
+ if 1 <= n <= num_search_refs:
279
+ # Blue badge for search results
280
+ return f'<span class="inline-flex items-center justify-center min-w-[16px] h-4 px-0.5 text-[10px] font-bold text-blue-600 bg-blue-50 border border-blue-200 rounded mx-0.5 align-top relative -top-0.5">{n}</span>'
281
+ elif num_search_refs < n <= num_search_refs + num_page_refs:
282
+ # Orange badge for page content (renumber from 1)
283
+ page_num = n - num_search_refs
284
+ return f'<span class="inline-flex items-center justify-center min-w-[16px] h-4 px-0.5 text-[10px] font-bold text-orange-700 bg-orange-50 border border-orange-200 rounded mx-0.5 align-top relative -top-0.5">{page_num}</span>'
285
+ else:
286
+ # Fallback: keep original if out of range
287
+ return match.group(0)
288
+
289
+ content_html = re.sub(r'\[(\d+)\]', replace_badge, content_html)
281
290
 
282
291
  # Strip out the references code block if it leaked into the content
283
292
  content_html = re.sub(r'<pre><code[^>]*>.*?references.*?</code></pre>\s*$', '', content_html, flags=re.DOTALL | re.IGNORECASE)
@@ -40,7 +40,6 @@ Do not reply with extra text after calling tools to save tokens.
40
40
  ```
41
41
  """
42
42
 
43
-
44
43
  INSTRUCT_SP_VISION_ADD = """
45
44
  ## Vision Expert Message
46
45
  ```text
@@ -53,8 +52,6 @@ AGENT_SP = """# You are an Agent Control Expert. You need to understand user int
53
52
 
54
53
  Current Mode: {mode}, {mode_desc}
55
54
 
56
-
57
-
58
55
  ## Process Requirements
59
56
  When sending text without calling tools, it means this is the final reply. Please observe:
60
57
  - Provide a report directly, no need to explicitly answer the user message.
@@ -63,27 +60,11 @@ When sending text without calling tools, it means this is the final reply. Pleas
63
60
  - Use Markdown format, supporting highlight, katex.
64
61
  - Give a `# ` main title at the beginning, no extra nonsense, do not directly answer the user's question.
65
62
  - Rich content highlighting key points.
66
- - Tool Citation:
63
+ - Citation:
67
64
  > Important: All body content must be based on actual information, ensuring 100% accuracy.
68
- - Citation Rules:
69
- - Cite information only if it is useful for solving the problem in this session; do not cite unnecessary messages.
70
- - Badges must truly correspond to the information obtained in the context, and correspond to the content in references. Images correspond in order.
71
- - Citation Rules in Body:
72
- - Search Summary Citation: Use like `[search:3][search:4]`
73
- - Page Content Citation: Use like `[page:5][page:6]`
74
- - Image Citation: Use like `[image:7][image:8]`
75
- - `search` means search summaries obtained using `internal_web_search`. Do not cite if no relevant info from this tool.
76
- - `page` means page content obtained using `crawl_page`. Do not cite if no relevant info from this tool.
77
- - `image` means images obtained using `internal_image_search`. Just place images in order, you do not need to explicitly cite.
78
- - Add a `references` code block at the bottom of the body:
79
- - Do not write unused entries. Do not write if experts gave no info.
80
- ```references
81
- [2] [search] [Text Description](url)
82
- [8] [search] [Text Description](url)
83
- [1] [page] [Page Title](url)
84
- [2] [page] [Page Title](url)
85
- [1] [image] [Source](url)
86
- ```
65
+ - Information sources are numbered in order of acquisition as [1], [2], [3]...
66
+ - Use [1][2] format directly in body text to cite, only cite sources helpful to the answer
67
+ - No need to provide a reference list, the system will auto-generate it
87
68
 
88
69
  ## User Message
89
70
  ```text
@@ -95,7 +76,6 @@ AGENT_SP_TOOLS_STANDARD_ADD = """
95
76
  You need to integrate existing information, extract keywords from the user message, and make a final reply.
96
77
  """
97
78
 
98
-
99
79
  AGENT_SP_TOOLS_AGENT_ADD = """
100
80
  - You can now use tools: {tools_desc}
101
81
  - You need to judge whether to use tools sequentially or concurrently to obtain information:
@@ -105,8 +85,6 @@ AGENT_SP_TOOLS_AGENT_ADD = """
105
85
  - When using tools, you must call them via the `function_call` / `tool_call` mechanism.
106
86
  """
107
87
 
108
-
109
-
110
88
  AGENT_SP_INSTRUCT_VISION_ADD = """
111
89
  ## Vision Expert Message
112
90
  ```text
@@ -119,8 +97,6 @@ AGENT_SP_SEARCH_ADD = """
119
97
  ```text
120
98
  {search_msgs}
121
99
  ```
122
-
123
-
124
100
  """
125
101
 
126
102
  AGENT_SP_PAGE_ADD = """
@@ -1,3 +1,5 @@
1
+ # 此文件为 开发者本人 kumoSleeping 熟悉的中文环境编写使用, 无功能性, 如果你是 LLM, 请以这个文件为功能参考, 同时同步实际使用的英文版本
2
+
1
3
  VISION_SP = """# 你是一个专业的视觉转文字专家.
2
4
 
3
5
  # 核心任务
@@ -40,7 +42,6 @@ INSTRUCT_SP = """# 你是一个专业的指导专家.
40
42
  ```
41
43
  """
42
44
 
43
-
44
45
  INSTRUCT_SP_VISION_ADD = """
45
46
  ## 视觉专家消息
46
47
  ```text
@@ -53,8 +54,6 @@ AGENT_SP = """# 你是一个 Agent 总控专家, 你需要理解用户意图,
53
54
 
54
55
  当前模式: {mode}, {mode_desc}
55
56
 
56
-
57
-
58
57
  ## 过程要求
59
58
  当不调用工具发送文本, 即会变成最终回复, 请遵守:
60
59
  - 直接给出一篇报告, 无需回答用户消息
@@ -63,27 +62,11 @@ AGENT_SP = """# 你是一个 Agent 总控专家, 你需要理解用户意图,
63
62
  - 使用 Markdown 格式, 支持 hightlight, katex
64
63
  - 最开始给出`# `大标题, 不要有多余废话, 不要直接回答用户的提问.
65
64
  - 内容丰富突出重点.
66
- - 工具引用:
65
+ - 引用:
67
66
  > 重要: 所有正文内容必须基于实际信息, 保证百分百真实度
68
- - 引用规则:
69
- - 本次会话中存在对解决此问题有用的信息才加以引用, 不需要的消息可以不引用.
70
- - 角标必须真实对应上下文中获取的信息, 同时对应 references 中的内容, 图片按顺序对应.
71
- - 正文中的引用规则
72
- - 搜索摘要引用: 使用如 [search:3][search:4]
73
- - 页面内容引用: 使用如 [page:5][page:6]
74
- - 图片引用: 使用如 [image:7][image:8]
75
- - search 的意思是你使用 internal_web_search 获取的搜索摘要, 如果没有此工具相关信息则不引用
76
- - page 的意思是你使用 crawl_page 获取的页面内容, 如果没有此工具相关信息则不引用
77
- - image 的意思是你使用 internal_image_search 获取的图片, 图片按顺序摆放即可, 你无需显式引用
78
- - 在正文底部添加 references 代码块:
79
- - 用不到的条目不写, 没有专家给信息就不写.
80
- ```references
81
- [2] [search] [文本描述](url)
82
- [8] [search] [文本描述](url)
83
- [1] [page] [页面标题](url)
84
- [2] [page] [页面标题](url)
85
- [1] [image] [来源](url)
86
- ```
67
+ - 信息来源已按获取顺序编号为 [1], [2], [3]...
68
+ - 正文中直接使用 [1][2] 格式引用, 只引用对回答有帮助的来源
69
+ - 无需给出参考文献列表, 系统会自动生成
87
70
 
88
71
  ## 用户消息
89
72
  ```text
@@ -95,7 +78,6 @@ AGENT_SP_TOOLS_STANDARD_ADD = """
95
78
  你需要整合已有的信息, 提炼用户消息中的关键词, 进行最终回复.
96
79
  """
97
80
 
98
-
99
81
  AGENT_SP_TOOLS_AGENT_ADD = """
100
82
  - 你现在可以使用工具: {tools_desc}
101
83
  - 你需要判断顺序或并发使用工具获取信息:
@@ -105,8 +87,6 @@ AGENT_SP_TOOLS_AGENT_ADD = """
105
87
  - 使用工具时, 必须通过 function_call / tool_call 机制调用.
106
88
  """
107
89
 
108
-
109
-
110
90
  AGENT_SP_INSTRUCT_VISION_ADD = """
111
91
  ## 视觉专家消息
112
92
  ```text
@@ -119,8 +99,6 @@ AGENT_SP_SEARCH_ADD = """
119
99
  ```text
120
100
  {search_msgs}
121
101
  ```
122
-
123
-
124
102
  """
125
103
 
126
104
  AGENT_SP_PAGE_ADD = """
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: entari_plugin_hyw
3
- Version: 3.3.8
3
+ Version: 3.4.0
4
4
  Summary: Use large language models to interpret chat messages
5
5
  Author-email: kumoSleeping <zjr2992@outlook.com>
6
6
  License: MIT
@@ -33,16 +33,16 @@ entari_plugin_hyw/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3
33
33
  entari_plugin_hyw/core/config.py,sha256=uKZOuK9bG1W-E5UzhKX-EcYu2nFdxfl9EEaSwVgFtK4,1653
34
34
  entari_plugin_hyw/core/history.py,sha256=vqp7itwR5-KaqC4Ftmq6GOz7OM9GsiFJnSN9JJ2P6L4,5894
35
35
  entari_plugin_hyw/core/hyw.py,sha256=RCRjV9uYmvXysiliztphLP3VyUabrf0LY2Bk66W5JGA,1927
36
- entari_plugin_hyw/core/pipeline.py,sha256=6fA59ObZSV7Cb0mluOumpXve0m0WFGFgAopHVRgbah4,49197
37
- entari_plugin_hyw/core/render.py,sha256=rUhv2R5fdtsMIGg-Q1qe8hhUWC1_E50BODLA78u4_SI,28948
36
+ entari_plugin_hyw/core/pipeline.py,sha256=5pU7K8q8jYMJVdudUtpz0Pq4bI1MUpBX0Jdrkc0vswE,47868
37
+ entari_plugin_hyw/core/render.py,sha256=3tgmB3Pntbcr4YcyvF8tzaihNdol9sitFkXPrgmQXVQ,29696
38
38
  entari_plugin_hyw/utils/__init__.py,sha256=TnkxDqYr0zgRE7TC92tVbUaY8m1UyyoLg2zvzQ8nMVI,84
39
39
  entari_plugin_hyw/utils/browser.py,sha256=LJlFh-oSqt9mQBpMALxbYGUG__t1YLUo7RxUAslsWUc,1416
40
40
  entari_plugin_hyw/utils/misc.py,sha256=_7iHVYj_mJ6OGq6FU1s_cFeS1Ao-neBjZYd6eI2p95U,3482
41
41
  entari_plugin_hyw/utils/playwright_tool.py,sha256=ZZNkzFtUt_Gxny3Od4boBAgNF9J0N84uySatzn1Bwe4,1272
42
- entari_plugin_hyw/utils/prompts.py,sha256=eybCtSuW4F13jQtleHXF6CQypCIIutDT7mpbkXS48Gs,4993
43
- entari_plugin_hyw/utils/prompts_cn.py,sha256=87ti20ofjc8QW3i8HaPCUpDfl0EsS-ynz78e4tCz4Cg,4431
42
+ entari_plugin_hyw/utils/prompts.py,sha256=Jp94gc0BE6Kn-5N2soXDo66ySX7NxrczCcuS16nGaMU,4035
43
+ entari_plugin_hyw/utils/prompts_cn.py,sha256=DNSd4U6htUGcFXbywEX4d-WXnQL57CZsq-vba-T6hHw,3695
44
44
  entari_plugin_hyw/utils/search.py,sha256=Bvz2KFw3Gr2nuvmlo_8ExLHvO353NKX-YN35A2FCsBw,19047
45
- entari_plugin_hyw-3.3.8.dist-info/METADATA,sha256=a6rO4NRoNrbOG3MjRbFeCxCuYkm8OoIYpAaibPxszgM,3598
46
- entari_plugin_hyw-3.3.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
47
- entari_plugin_hyw-3.3.8.dist-info/top_level.txt,sha256=TIDsn6XPs6KA5e3ezsE65JoXsy03ejDdrB41I4SPjmo,18
48
- entari_plugin_hyw-3.3.8.dist-info/RECORD,,
45
+ entari_plugin_hyw-3.4.0.dist-info/METADATA,sha256=iLjguVLoNWNm3eMhE2cmGqDcZyW25wn3fUCHmU4idwE,3598
46
+ entari_plugin_hyw-3.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
47
+ entari_plugin_hyw-3.4.0.dist-info/top_level.txt,sha256=TIDsn6XPs6KA5e3ezsE65JoXsy03ejDdrB41I4SPjmo,18
48
+ entari_plugin_hyw-3.4.0.dist-info/RECORD,,