PyPI - entari-plugin-hyw - Versions diffs - 3.3.8__py3-none-any.whl → 3.4.0__py3-none-any.whl - Mend

entari-plugin-hyw 3.3.8py3-none-any.whl → 3.4.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of entari-plugin-hyw might be problematic. Click here for more details.

Files changed (8) hide show

entari_plugin_hyw/core/pipeline.py CHANGED Viewed

@@ -39,10 +39,8 @@ class ProcessingPipeline:
         self.client = AsyncOpenAI(base_url=self.config.base_url, api_key=self.config.api_key)
         self.all_web_results = [] # Cache for search results
         self.current_mode = "standard"  # standard | agent
-        # Independent ID counters for each type
-        self.search_id_counter = 0
-        self.page_id_counter = 0
-        self.image_id_counter = 0
+        # Global ID counter for all types (unified numbering)
+        self.global_id_counter = 0
         self.web_search_tool = {
             "type": "function",
@@ -628,7 +626,12 @@ class ProcessingPipeline:
             }
     def _parse_tagged_response(self, text: str) -> Dict[str, Any]:
-        """Parse response for references and page references reordered by appearance."""
+        """Parse response and auto-infer references from [N] citations in body text.
+        New simplified format:
+        - Body text uses [1][2] format for citations
+        - No ref code block needed - we auto-infer from citations
+        """
         parsed = {"response": "", "references": [], "page_references": [], "image_references": [], "flow_steps": []}
         if not text:
             return parsed
@@ -646,104 +649,65 @@ class ProcessingPipeline:
         except Exception:
             pass
-        # 2. Extract references from text first (Order by appearance)
-        # Pattern matches [search:123], [page:123], [image:123]
-        pattern = re.compile(r'\[(search|page|image):(\d+)\]', re.IGNORECASE)
-        matches = list(pattern.finditer(remaining_text))
+        # 2. Extract all [N] citations from body text (scan left to right for order)
+        body_pattern = re.compile(r'\[(\d+)\]')
+        id_order = []  # Preserve citation order
-        search_map = {}  # old_id_str -> new_id (int)
-        page_map = {}
-        image_map = {}
+        for match in body_pattern.finditer(remaining_text):
+            try:
+                id_val = int(match.group(1))
+                if id_val not in id_order:
+                    id_order.append(id_val)
+            except ValueError:
+                pass
+        # 3. Build references by looking up cited IDs in all_web_results
+        # Order by appearance in text
+        old_to_new_map = {}  # old_id -> new_id (for search & page only)
-        def process_ref(tag_type, old_id):
-            # Find in all_web_results
-            result_item = next((r for r in self.all_web_results if r.get("_id") == old_id and r.get("_type") == tag_type), None)
+        for old_id in id_order:
+            # Find in all_web_results by _id
+            result_item = next((r for r in self.all_web_results if r.get("_id") == old_id), None)
-            if not result_item:
-                return
+            if result_item:
+                entry = {
+                    "title": result_item.get("title", ""),
+                    "url": result_item.get("url", ""),
+                    "domain": result_item.get("domain", "")
+                }
-            entry = {
-                "title": result_item.get("title", ""),
-                "url": result_item.get("url", ""),
-                "domain": result_item.get("domain", "")
-            }
-            if tag_type == "image":
-                 entry["thumbnail"] = result_item.get("thumbnail", "")
-            # Add to respective list and map
-            # Check maps to avoid duplicates
-            if tag_type == "search":
-                if str(old_id) not in search_map:
+                item_type = result_item.get("_type", "")
+                # Auto-classify by type
+                if item_type == "search":
                     parsed["references"].append(entry)
-                    search_map[str(old_id)] = len(parsed["references"])
-            elif tag_type == "page":
-                if str(old_id) not in page_map:
+                    old_to_new_map[old_id] = len(parsed["references"])
+                elif item_type == "page":
                     parsed["page_references"].append(entry)
-                    page_map[str(old_id)] = len(parsed["page_references"])
-            elif tag_type == "image":
-                if str(old_id) not in image_map:
+                    old_to_new_map[old_id] = len(parsed["page_references"])
+                elif item_type == "image":
+                    # Collect image but don't add to map (will be stripped from text)
+                    entry["thumbnail"] = result_item.get("thumbnail", "")
                     parsed["image_references"].append(entry)
-                    image_map[str(old_id)] = len(parsed["image_references"])
+                    # Note: no old_to_new_map entry - image citations will be removed
-        # Pass 1: Text Body
-        for m in matches:
+        # 4. Replace [old_id] with [new_id] in text, or remove if image
+        def replace_id(match):
             try:
-                process_ref(m.group(1).lower(), int(m.group(2)))
-            except ValueError:
-                continue
-        # 3. Pass 2: References Block (Capture items missed in text)
-        ref_block_match = re.search(r'```references\s*(.*?)\s*```', remaining_text, re.DOTALL | re.IGNORECASE)
-        if ref_block_match:
-            ref_content = ref_block_match.group(1).strip()
-            remaining_text = remaining_text.replace(ref_block_match.group(0), "").strip()
-            for line in ref_content.split("\n"):
-                line = line.strip()
-                if not line: continue
-                # Match [id] [type]
-                # e.g. [1] [image] ... or [image:1] ...
-                # Check for [id] [type] format
-                id_match = re.match(r"^\[(\d+)\]\s*\[(search|page|image)\]", line, re.IGNORECASE)
-                if id_match:
-                    try:
-                         process_ref(id_match.group(2).lower(), int(id_match.group(1)))
-                    except ValueError:
-                        pass
+                old_id = int(match.group(1))
+                new_id = old_to_new_map.get(old_id)
+                if new_id is not None:
+                    return f"[{new_id}]"
                 else:
-                    # Check for [type:id] format in list
-                    alt_match = re.match(r"^\[(search|page|image):(\d+)\]", line, re.IGNORECASE)
-                    if alt_match:
-                        try:
-                            process_ref(alt_match.group(1).lower(), int(alt_match.group(2)))
-                        except ValueError:
-                            pass
-        # 4. Replace tags in text with new sequential IDs
-        # 4. Replace tags in text with new sequential IDs
-        def replace_tag(match):
-            tag_type = match.group(1).lower()
-            old_id = match.group(2)
-            new_id = None
-            if tag_type == "search":
-                new_id = search_map.get(old_id)
-            elif tag_type == "page":
-                new_id = page_map.get(old_id)
-            elif tag_type == "image":
-                new_id = image_map.get(old_id)
-            if new_id is not None:
-                if tag_type == "image":
-                    return ""
-                return f"[{tag_type}:{new_id}]"
+                    # Check if it's an image reference (not in map)
+                    item = next((r for r in self.all_web_results if r.get("_id") == old_id), None)
+                    if item and item.get("_type") == "image":
+                        return ""  # Remove image citations from text
+            except ValueError:
+                pass
             return match.group(0)
-        remaining_text = pattern.sub(replace_tag, remaining_text)
+        remaining_text = body_pattern.sub(replace_id, remaining_text)
         parsed["response"] = remaining_text.strip()
         return parsed
@@ -766,10 +730,10 @@ class ProcessingPipeline:
             query = args.get("query")
             web = await self.search_service.search(query)
-            # Cache results and assign search-specific IDs
+            # Cache results and assign global IDs
             for item in web:
-                self.search_id_counter += 1
-                item["_id"] = self.search_id_counter
+                self.global_id_counter += 1
+                item["_id"] = self.global_id_counter
                 item["_type"] = "search"
                 item["query"] = query
                 self.all_web_results.append(item)
@@ -780,10 +744,10 @@ class ProcessingPipeline:
             query = args.get("query")
             images = await self.search_service.image_search(query)
-            # Cache results and assign image-specific IDs
+            # Cache results and assign global IDs
             for item in images:
-                self.image_id_counter += 1
-                item["_id"] = self.image_id_counter
+                self.global_id_counter += 1
+                item["_id"] = self.global_id_counter
                 item["_type"] = "image"
                 item["query"] = query
                 item["is_image"] = True
@@ -797,11 +761,11 @@ class ProcessingPipeline:
             # Returns Dict: {content, title, url}
             result_dict = await self.search_service.fetch_page(url)
-            # Cache the crawled content with page-specific ID
-            self.page_id_counter += 1
+            # Cache the crawled content with global ID
+            self.global_id_counter += 1
             cached_item = {
-                "_id": self.page_id_counter,
+                "_id": self.global_id_counter,
                 "_type": "page",
                 "title": result_dict.get("title", "Page"),
                 "url": result_dict.get("url", url),

entari_plugin_hyw/core/render.py CHANGED Viewed

@@ -266,18 +266,27 @@ class ContentRenderer:
                 content_html = restore_math(content_html)
-                # Convert [search:N] to blue badge
-                content_html = re.sub(
-                    r'\[search:(\d+)\]',
-                    r'<span class="inline-flex items-center justify-center min-w-[16px] h-4 px-0.5 text-[10px] font-bold text-blue-600 bg-blue-50 border border-blue-200 rounded mx-0.5 align-top relative -top-0.5">\1</span>',
-                    content_html
-                )
-                # Convert [page:N] to orange badge
-                content_html = re.sub(
-                    r'\[page:(\d+)\]',
-                    r'<span class="inline-flex items-center justify-center min-w-[16px] h-4 px-0.5 text-[10px] font-bold text-orange-700 bg-orange-50 border border-orange-200 rounded mx-0.5 align-top relative -top-0.5">\1</span>',
-                    content_html
-                )
+                # Convert [N] to colored badges based on index position
+                # - Numbers 1 to len(references) → blue (search results)
+                # - Numbers len(references)+1 to len(references)+len(page_references) → orange (page content)
+                num_search_refs = len(references) if references else 0
+                num_page_refs = len(page_references) if page_references else 0
+                def replace_badge(match):
+                    n = int(match.group(1))
+                    if 1 <= n <= num_search_refs:
+                        # Blue badge for search results
+                        return f'<span class="inline-flex items-center justify-center min-w-[16px] h-4 px-0.5 text-[10px] font-bold text-blue-600 bg-blue-50 border border-blue-200 rounded mx-0.5 align-top relative -top-0.5">{n}</span>'
+                    elif num_search_refs < n <= num_search_refs + num_page_refs:
+                        # Orange badge for page content (renumber from 1)
+                        page_num = n - num_search_refs
+                        return f'<span class="inline-flex items-center justify-center min-w-[16px] h-4 px-0.5 text-[10px] font-bold text-orange-700 bg-orange-50 border border-orange-200 rounded mx-0.5 align-top relative -top-0.5">{page_num}</span>'
+                    else:
+                        # Fallback: keep original if out of range
+                        return match.group(0)
+                content_html = re.sub(r'\[(\d+)\]', replace_badge, content_html)
                 # Strip out the references code block if it leaked into the content
                 content_html = re.sub(r'<pre><code[^>]*>.*?references.*?</code></pre>\s*$', '', content_html, flags=re.DOTALL | re.IGNORECASE)

entari_plugin_hyw/utils/prompts.py CHANGED Viewed

@@ -40,7 +40,6 @@ Do not reply with extra text after calling tools to save tokens.
 ```
 """
 INSTRUCT_SP_VISION_ADD = """
 ## Vision Expert Message
 ```text
@@ -53,8 +52,6 @@ AGENT_SP = """# You are an Agent Control Expert. You need to understand user int
 Current Mode: {mode}, {mode_desc}
 ## Process Requirements
 When sending text without calling tools, it means this is the final reply. Please observe:
 - Provide a report directly, no need to explicitly answer the user message.
@@ -63,27 +60,11 @@ When sending text without calling tools, it means this is the final reply. Pleas
   - Use Markdown format, supporting highlight, katex.
   - Give a `# ` main title at the beginning, no extra nonsense, do not directly answer the user's question.
   - Rich content highlighting key points.
-- Tool Citation:
+- Citation:
   > Important: All body content must be based on actual information, ensuring 100% accuracy.
-  - Citation Rules:
-    - Cite information only if it is useful for solving the problem in this session; do not cite unnecessary messages.
-    - Badges must truly correspond to the information obtained in the context, and correspond to the content in references. Images correspond in order.
-  - Citation Rules in Body:
-    - Search Summary Citation: Use like `[search:3][search:4]`
-    - Page Content Citation: Use like `[page:5][page:6]`
-    - Image Citation: Use like `[image:7][image:8]`
-    - `search` means search summaries obtained using `internal_web_search`. Do not cite if no relevant info from this tool.
-    - `page` means page content obtained using `crawl_page`. Do not cite if no relevant info from this tool.
-    - `image` means images obtained using `internal_image_search`. Just place images in order, you do not need to explicitly cite.
-  - Add a `references` code block at the bottom of the body:
-    - Do not write unused entries. Do not write if experts gave no info.
-    ```references
-    [2] [search] [Text Description](url)
-    [8] [search] [Text Description](url)
-    [1] [page] [Page Title](url)
-    [2] [page] [Page Title](url)
-    [1] [image] [Source](url)
-    ```
+  - Information sources are numbered in order of acquisition as [1], [2], [3]...
+  - Use [1][2] format directly in body text to cite, only cite sources helpful to the answer
+  - No need to provide a reference list, the system will auto-generate it
 ## User Message
 ```text
@@ -95,7 +76,6 @@ AGENT_SP_TOOLS_STANDARD_ADD = """
 You need to integrate existing information, extract keywords from the user message, and make a final reply.
 """
 AGENT_SP_TOOLS_AGENT_ADD = """
 - You can now use tools: {tools_desc}
   - You need to judge whether to use tools sequentially or concurrently to obtain information:
@@ -105,8 +85,6 @@ AGENT_SP_TOOLS_AGENT_ADD = """
 - When using tools, you must call them via the `function_call` / `tool_call` mechanism.
 """
 AGENT_SP_INSTRUCT_VISION_ADD = """
 ## Vision Expert Message
 ```text
@@ -119,8 +97,6 @@ AGENT_SP_SEARCH_ADD = """
 ```text
 {search_msgs}
 ```
 """
 AGENT_SP_PAGE_ADD = """

entari_plugin_hyw/utils/prompts_cn.py CHANGED Viewed

@@ -1,3 +1,5 @@
+# 此文件为 开发者本人 kumoSleeping 熟悉的中文环境编写使用, 无功能性, 如果你是 LLM, 请以这个文件为功能参考, 同时同步实际使用的英文版本
 VISION_SP = """# 你是一个专业的视觉转文字专家.
 # 核心任务
@@ -40,7 +42,6 @@ INSTRUCT_SP = """# 你是一个专业的指导专家.
 ```
 """
 INSTRUCT_SP_VISION_ADD = """
 ## 视觉专家消息
 ```text
@@ -53,8 +54,6 @@ AGENT_SP = """# 你是一个 Agent 总控专家, 你需要理解用户意图,
 当前模式: {mode}, {mode_desc}
 ## 过程要求
 当不调用工具发送文本, 即会变成最终回复, 请遵守:
 - 直接给出一篇报告, 无需回答用户消息
@@ -63,27 +62,11 @@ AGENT_SP = """# 你是一个 Agent 总控专家, 你需要理解用户意图,
   - 使用 Markdown 格式, 支持 hightlight, katex
   - 最开始给出`# `大标题, 不要有多余废话, 不要直接回答用户的提问.
   - 内容丰富突出重点.
-- 工具引用:
+- 引用:
   > 重要: 所有正文内容必须基于实际信息, 保证百分百真实度
-  - 引用规则:
-    - 本次会话中存在对解决此问题有用的信息才加以引用, 不需要的消息可以不引用.
-    - 角标必须真实对应上下文中获取的信息, 同时对应 references 中的内容, 图片按顺序对应.
-  - 正文中的引用规则
-    - 搜索摘要引用: 使用如 [search:3][search:4]
-    - 页面内容引用: 使用如 [page:5][page:6]
-    - 图片引用: 使用如 [image:7][image:8]
-    - search 的意思是你使用 internal_web_search 获取的搜索摘要, 如果没有此工具相关信息则不引用
-    - page 的意思是你使用 crawl_page 获取的页面内容, 如果没有此工具相关信息则不引用
-    - image 的意思是你使用 internal_image_search 获取的图片, 图片按顺序摆放即可, 你无需显式引用
-  - 在正文底部添加 references 代码块:
-    - 用不到的条目不写, 没有专家给信息就不写.
-    ```references
-    [2] [search] [文本描述](url)
-    [8] [search] [文本描述](url)
-    [1] [page] [页面标题](url)
-    [2] [page] [页面标题](url)
-    [1] [image] [来源](url)
-    ```
+  - 信息来源已按获取顺序编号为 [1], [2], [3]...
+  - 正文中直接使用 [1][2] 格式引用, 只引用对回答有帮助的来源
+  - 无需给出参考文献列表, 系统会自动生成
 ## 用户消息
 ```text
@@ -95,7 +78,6 @@ AGENT_SP_TOOLS_STANDARD_ADD = """
 你需要整合已有的信息, 提炼用户消息中的关键词, 进行最终回复.
 """
 AGENT_SP_TOOLS_AGENT_ADD = """
 - 你现在可以使用工具: {tools_desc}
   - 你需要判断顺序或并发使用工具获取信息:
@@ -105,8 +87,6 @@ AGENT_SP_TOOLS_AGENT_ADD = """
 - 使用工具时, 必须通过 function_call / tool_call 机制调用.
 """
 AGENT_SP_INSTRUCT_VISION_ADD = """
 ## 视觉专家消息
 ```text
@@ -119,8 +99,6 @@ AGENT_SP_SEARCH_ADD = """
 ```text
 {search_msgs}
 ```
 """
 AGENT_SP_PAGE_ADD = """

{entari_plugin_hyw-3.3.8.dist-info → entari_plugin_hyw-3.4.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: entari_plugin_hyw
-Version: 3.3.8
+Version: 3.4.0
 Summary: Use large language models to interpret chat messages
 Author-email: kumoSleeping <zjr2992@outlook.com>
 License: MIT

{entari_plugin_hyw-3.3.8.dist-info → entari_plugin_hyw-3.4.0.dist-info}/RECORD RENAMED Viewed

@@ -33,16 +33,16 @@ entari_plugin_hyw/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3
 entari_plugin_hyw/core/config.py,sha256=uKZOuK9bG1W-E5UzhKX-EcYu2nFdxfl9EEaSwVgFtK4,1653
 entari_plugin_hyw/core/history.py,sha256=vqp7itwR5-KaqC4Ftmq6GOz7OM9GsiFJnSN9JJ2P6L4,5894
 entari_plugin_hyw/core/hyw.py,sha256=RCRjV9uYmvXysiliztphLP3VyUabrf0LY2Bk66W5JGA,1927
-entari_plugin_hyw/core/pipeline.py,sha256=6fA59ObZSV7Cb0mluOumpXve0m0WFGFgAopHVRgbah4,49197
-entari_plugin_hyw/core/render.py,sha256=rUhv2R5fdtsMIGg-Q1qe8hhUWC1_E50BODLA78u4_SI,28948
+entari_plugin_hyw/core/pipeline.py,sha256=5pU7K8q8jYMJVdudUtpz0Pq4bI1MUpBX0Jdrkc0vswE,47868
+entari_plugin_hyw/core/render.py,sha256=3tgmB3Pntbcr4YcyvF8tzaihNdol9sitFkXPrgmQXVQ,29696
 entari_plugin_hyw/utils/__init__.py,sha256=TnkxDqYr0zgRE7TC92tVbUaY8m1UyyoLg2zvzQ8nMVI,84
 entari_plugin_hyw/utils/browser.py,sha256=LJlFh-oSqt9mQBpMALxbYGUG__t1YLUo7RxUAslsWUc,1416
 entari_plugin_hyw/utils/misc.py,sha256=_7iHVYj_mJ6OGq6FU1s_cFeS1Ao-neBjZYd6eI2p95U,3482
 entari_plugin_hyw/utils/playwright_tool.py,sha256=ZZNkzFtUt_Gxny3Od4boBAgNF9J0N84uySatzn1Bwe4,1272
-entari_plugin_hyw/utils/prompts.py,sha256=eybCtSuW4F13jQtleHXF6CQypCIIutDT7mpbkXS48Gs,4993
-entari_plugin_hyw/utils/prompts_cn.py,sha256=87ti20ofjc8QW3i8HaPCUpDfl0EsS-ynz78e4tCz4Cg,4431
+entari_plugin_hyw/utils/prompts.py,sha256=Jp94gc0BE6Kn-5N2soXDo66ySX7NxrczCcuS16nGaMU,4035
+entari_plugin_hyw/utils/prompts_cn.py,sha256=DNSd4U6htUGcFXbywEX4d-WXnQL57CZsq-vba-T6hHw,3695
 entari_plugin_hyw/utils/search.py,sha256=Bvz2KFw3Gr2nuvmlo_8ExLHvO353NKX-YN35A2FCsBw,19047
-entari_plugin_hyw-3.3.8.dist-info/METADATA,sha256=a6rO4NRoNrbOG3MjRbFeCxCuYkm8OoIYpAaibPxszgM,3598
-entari_plugin_hyw-3.3.8.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
-entari_plugin_hyw-3.3.8.dist-info/top_level.txt,sha256=TIDsn6XPs6KA5e3ezsE65JoXsy03ejDdrB41I4SPjmo,18
-entari_plugin_hyw-3.3.8.dist-info/RECORD,,
+entari_plugin_hyw-3.4.0.dist-info/METADATA,sha256=iLjguVLoNWNm3eMhE2cmGqDcZyW25wn3fUCHmU4idwE,3598
+entari_plugin_hyw-3.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
+entari_plugin_hyw-3.4.0.dist-info/top_level.txt,sha256=TIDsn6XPs6KA5e3ezsE65JoXsy03ejDdrB41I4SPjmo,18
+entari_plugin_hyw-3.4.0.dist-info/RECORD,,

{entari_plugin_hyw-3.3.8.dist-info → entari_plugin_hyw-3.4.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{entari_plugin_hyw-3.3.8.dist-info → entari_plugin_hyw-3.4.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

entari-plugin-hyw 3.3.8__py3-none-any.whl → 3.4.0__py3-none-any.whl

Potentially problematic release.

entari-plugin-hyw 3.3.8py3-none-any.whl → 3.4.0py3-none-any.whl