entari-plugin-hyw 3.4.2__py3-none-any.whl → 3.5.0rc2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of entari-plugin-hyw might be problematic. Click here for more details.

Files changed (92) hide show
  1. entari_plugin_hyw/__init__.py +78 -158
  2. entari_plugin_hyw/assets/card-dist/index.html +396 -0
  3. entari_plugin_hyw/assets/card-dist/logos/anthropic.svg +1 -0
  4. entari_plugin_hyw/assets/card-dist/logos/cerebras.svg +9 -0
  5. entari_plugin_hyw/assets/card-dist/logos/deepseek.png +0 -0
  6. entari_plugin_hyw/assets/card-dist/logos/gemini.svg +1 -0
  7. entari_plugin_hyw/assets/card-dist/logos/google.svg +1 -0
  8. entari_plugin_hyw/assets/card-dist/logos/grok.png +0 -0
  9. entari_plugin_hyw/assets/card-dist/logos/huggingface.png +0 -0
  10. entari_plugin_hyw/assets/card-dist/logos/microsoft.svg +15 -0
  11. entari_plugin_hyw/assets/card-dist/logos/minimax.png +0 -0
  12. entari_plugin_hyw/assets/card-dist/logos/mistral.png +0 -0
  13. entari_plugin_hyw/assets/card-dist/logos/nvida.png +0 -0
  14. entari_plugin_hyw/assets/card-dist/logos/openai.svg +1 -0
  15. entari_plugin_hyw/assets/card-dist/logos/openrouter.png +0 -0
  16. entari_plugin_hyw/assets/card-dist/logos/perplexity.svg +24 -0
  17. entari_plugin_hyw/assets/card-dist/logos/qwen.png +0 -0
  18. entari_plugin_hyw/assets/card-dist/logos/xai.png +0 -0
  19. entari_plugin_hyw/assets/card-dist/logos/xiaomi.png +0 -0
  20. entari_plugin_hyw/assets/card-dist/logos/zai.png +0 -0
  21. entari_plugin_hyw/assets/card-dist/vite.svg +1 -0
  22. entari_plugin_hyw/card-ui/.gitignore +24 -0
  23. entari_plugin_hyw/card-ui/README.md +5 -0
  24. entari_plugin_hyw/card-ui/index.html +16 -0
  25. entari_plugin_hyw/card-ui/package-lock.json +2342 -0
  26. entari_plugin_hyw/card-ui/package.json +31 -0
  27. entari_plugin_hyw/card-ui/public/logos/anthropic.svg +1 -0
  28. entari_plugin_hyw/card-ui/public/logos/cerebras.svg +9 -0
  29. entari_plugin_hyw/card-ui/public/logos/deepseek.png +0 -0
  30. entari_plugin_hyw/card-ui/public/logos/gemini.svg +1 -0
  31. entari_plugin_hyw/card-ui/public/logos/google.svg +1 -0
  32. entari_plugin_hyw/card-ui/public/logos/grok.png +0 -0
  33. entari_plugin_hyw/card-ui/public/logos/huggingface.png +0 -0
  34. entari_plugin_hyw/card-ui/public/logos/microsoft.svg +15 -0
  35. entari_plugin_hyw/card-ui/public/logos/minimax.png +0 -0
  36. entari_plugin_hyw/card-ui/public/logos/mistral.png +0 -0
  37. entari_plugin_hyw/card-ui/public/logos/nvida.png +0 -0
  38. entari_plugin_hyw/card-ui/public/logos/openai.svg +1 -0
  39. entari_plugin_hyw/card-ui/public/logos/openrouter.png +0 -0
  40. entari_plugin_hyw/card-ui/public/logos/perplexity.svg +24 -0
  41. entari_plugin_hyw/card-ui/public/logos/qwen.png +0 -0
  42. entari_plugin_hyw/card-ui/public/logos/xai.png +0 -0
  43. entari_plugin_hyw/card-ui/public/logos/xiaomi.png +0 -0
  44. entari_plugin_hyw/card-ui/public/logos/zai.png +0 -0
  45. entari_plugin_hyw/card-ui/public/vite.svg +1 -0
  46. entari_plugin_hyw/card-ui/src/App.vue +410 -0
  47. entari_plugin_hyw/card-ui/src/assets/vue.svg +1 -0
  48. entari_plugin_hyw/card-ui/src/components/HelloWorld.vue +41 -0
  49. entari_plugin_hyw/card-ui/src/components/MarkdownContent.vue +385 -0
  50. entari_plugin_hyw/card-ui/src/components/SectionCard.vue +41 -0
  51. entari_plugin_hyw/card-ui/src/components/StageCard.vue +183 -0
  52. entari_plugin_hyw/card-ui/src/main.ts +5 -0
  53. entari_plugin_hyw/card-ui/src/style.css +8 -0
  54. entari_plugin_hyw/card-ui/src/test_regex.js +103 -0
  55. entari_plugin_hyw/card-ui/src/types.ts +52 -0
  56. entari_plugin_hyw/card-ui/tsconfig.app.json +16 -0
  57. entari_plugin_hyw/card-ui/tsconfig.json +7 -0
  58. entari_plugin_hyw/card-ui/tsconfig.node.json +26 -0
  59. entari_plugin_hyw/card-ui/vite.config.ts +16 -0
  60. entari_plugin_hyw/{core/history.py → history.py} +25 -1
  61. entari_plugin_hyw/image_cache.py +283 -0
  62. entari_plugin_hyw/{utils/misc.py → misc.py} +0 -3
  63. entari_plugin_hyw/{core/pipeline.py → pipeline.py} +236 -86
  64. entari_plugin_hyw/{utils/prompts_cn.py → prompts.py} +10 -25
  65. entari_plugin_hyw/render_vue.py +314 -0
  66. entari_plugin_hyw/{utils/search.py → search.py} +227 -10
  67. {entari_plugin_hyw-3.4.2.dist-info → entari_plugin_hyw-3.5.0rc2.dist-info}/METADATA +5 -2
  68. entari_plugin_hyw-3.5.0rc2.dist-info/RECORD +88 -0
  69. entari_plugin_hyw/assets/libs/highlight.css +0 -10
  70. entari_plugin_hyw/assets/libs/highlight.js +0 -1213
  71. entari_plugin_hyw/assets/libs/katex-auto-render.js +0 -1
  72. entari_plugin_hyw/assets/libs/katex.css +0 -1
  73. entari_plugin_hyw/assets/libs/katex.js +0 -1
  74. entari_plugin_hyw/assets/libs/tailwind.css +0 -1
  75. entari_plugin_hyw/assets/package-lock.json +0 -953
  76. entari_plugin_hyw/assets/package.json +0 -16
  77. entari_plugin_hyw/assets/tailwind.config.js +0 -12
  78. entari_plugin_hyw/assets/tailwind.input.css +0 -235
  79. entari_plugin_hyw/assets/template.html +0 -157
  80. entari_plugin_hyw/assets/template.html.bak +0 -157
  81. entari_plugin_hyw/assets/template.j2 +0 -400
  82. entari_plugin_hyw/core/__init__.py +0 -0
  83. entari_plugin_hyw/core/config.py +0 -38
  84. entari_plugin_hyw/core/hyw.py +0 -48
  85. entari_plugin_hyw/core/render.py +0 -630
  86. entari_plugin_hyw/utils/__init__.py +0 -2
  87. entari_plugin_hyw/utils/browser.py +0 -40
  88. entari_plugin_hyw/utils/playwright_tool.py +0 -36
  89. entari_plugin_hyw/utils/prompts.py +0 -119
  90. entari_plugin_hyw-3.4.2.dist-info/RECORD +0 -49
  91. {entari_plugin_hyw-3.4.2.dist-info → entari_plugin_hyw-3.5.0rc2.dist-info}/WHEEL +0 -0
  92. {entari_plugin_hyw-3.4.2.dist-info → entari_plugin_hyw-3.5.0rc2.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,7 @@
1
1
  import asyncio
2
2
  import html
3
3
  import json
4
+ import re
4
5
  import time
5
6
  from contextlib import asynccontextmanager
6
7
  from typing import Any, Dict, List, Optional, Tuple
@@ -8,16 +9,14 @@ from typing import Any, Dict, List, Optional, Tuple
8
9
  from loguru import logger
9
10
  from openai import AsyncOpenAI
10
11
 
11
- from .config import HYWConfig
12
- from ..utils.search import SearchService
13
- from ..utils.prompts import (
12
+ from .search import SearchService
13
+ from .image_cache import get_cached_images
14
+ from .prompts import (
14
15
  AGENT_SP,
15
16
  AGENT_SP_INSTRUCT_VISION_ADD,
16
17
  AGENT_SP_TOOLS_STANDARD_ADD,
17
18
  AGENT_SP_TOOLS_AGENT_ADD,
18
19
  AGENT_SP_SEARCH_ADD,
19
- AGENT_SP_PAGE_ADD,
20
- AGENT_SP_IMAGE_SEARCH_ADD,
21
20
  INSTRUCT_SP,
22
21
  INSTRUCT_SP_VISION_ADD,
23
22
  VISION_SP,
@@ -33,7 +32,7 @@ class ProcessingPipeline:
33
32
  Core pipeline (vision -> instruct/search -> agent).
34
33
  """
35
34
 
36
- def __init__(self, config: HYWConfig):
35
+ def __init__(self, config: Any):
37
36
  self.config = config
38
37
  self.search_service = SearchService(config)
39
38
  self.client = AsyncOpenAI(base_url=self.config.base_url, api_key=self.config.api_key)
@@ -120,11 +119,9 @@ class ProcessingPipeline:
120
119
  final_response_content = ""
121
120
  structured: Dict[str, Any] = {}
122
121
 
123
- # Reset search cache and ID counters for this execution
122
+ # Reset search cache and ID counter for this execution
124
123
  self.all_web_results = []
125
- self.search_id_counter = 0
126
- self.page_id_counter = 0
127
- self.image_id_counter = 0
124
+ self.global_id_counter = 0
128
125
 
129
126
  try:
130
127
  logger.info(f"Pipeline: Starting workflow for '{user_input}' using {active_model}")
@@ -188,7 +185,8 @@ class ProcessingPipeline:
188
185
  vision_text=vision_text,
189
186
  model=instruct_model,
190
187
  )
191
- instruct_time = time.time() - instruct_start
188
+ # Instruct time excludes search time (search_time is returned separately)
189
+ instruct_time = time.time() - instruct_start - search_time
192
190
 
193
191
  # Calculate Instruct Cost
194
192
  instruct_cost = 0.0
@@ -265,17 +263,18 @@ class ProcessingPipeline:
265
263
  if vision_text:
266
264
  system_prompt += AGENT_SP_INSTRUCT_VISION_ADD.format(vision_msgs=vision_text)
267
265
 
268
- # Append search results
269
- if has_search_results and search_msgs_text:
270
- system_prompt += AGENT_SP_SEARCH_ADD.format(search_msgs=search_msgs_text)
271
-
272
- # Append crawled page content
266
+ # Append all search results (text, page, image) in one block
273
267
  page_msgs_text = self._format_page_msgs()
268
+ all_search_parts = []
269
+ if has_search_results and search_msgs_text:
270
+ all_search_parts.append(search_msgs_text)
274
271
  if page_msgs_text:
275
- system_prompt += AGENT_SP_PAGE_ADD.format(page_msgs=page_msgs_text)
276
-
272
+ all_search_parts.append(page_msgs_text)
277
273
  if has_image_results and image_msgs_text:
278
- system_prompt += AGENT_SP_IMAGE_SEARCH_ADD.format(image_search_msgs=image_msgs_text)
274
+ all_search_parts.append(image_msgs_text)
275
+
276
+ if all_search_parts:
277
+ system_prompt += AGENT_SP_SEARCH_ADD.format(search_msgs="\n".join(all_search_parts))
279
278
 
280
279
  last_system_prompt = system_prompt
281
280
 
@@ -331,6 +330,7 @@ class ProcessingPipeline:
331
330
  "tool_results": [],
332
331
  "tool_time": tool_exec_time,
333
332
  "llm_time": step_llm_time,
333
+ "usage": step_usage,
334
334
  }
335
335
  for i, result in enumerate(results):
336
336
  tc = tool_calls[i]
@@ -428,7 +428,7 @@ class ProcessingPipeline:
428
428
  stages_used.append({
429
429
  "name": "Vision",
430
430
  "model": v_model,
431
- "icon_config": getattr(self.config, "vision_icon", None) or infer_icon(v_model, v_base_url),
431
+ "icon_config": infer_icon(v_model, v_base_url),
432
432
  "provider": infer_provider(v_base_url),
433
433
  "time": v.get("time", 0),
434
434
  "cost": v.get("cost", 0.0)
@@ -441,20 +441,33 @@ class ProcessingPipeline:
441
441
  stages_used.append({
442
442
  "name": "Instruct",
443
443
  "model": i_model,
444
- "icon_config": getattr(self.config, "instruct_icon", None) or infer_icon(i_model, i_base_url),
444
+ "icon_config": infer_icon(i_model, i_base_url),
445
445
  "provider": infer_provider(i_base_url),
446
446
  "time": i.get("time", 0),
447
447
  "cost": i.get("cost", 0.0)
448
448
  })
449
449
 
450
- if has_search_results and search_payloads:
450
+ # Show Search stage if we have ANY search results (text OR image)
451
+ if (has_search_results or has_image_results) and search_payloads:
452
+ # Collect initial search results for the Search stage card
453
+ initial_refs = [
454
+ {"title": r.get("title", ""), "url": r.get("url", ""), "domain": r.get("domain", "")}
455
+ for r in self.all_web_results if r.get("_type") == "search"
456
+ ]
457
+ initial_images = [
458
+ {"title": r.get("title", ""), "url": r.get("url", ""), "thumbnail": r.get("thumbnail", "")}
459
+ for r in self.all_web_results if r.get("_type") == "image"
460
+ ]
461
+
451
462
  stages_used.append({
452
463
  "name": "Search",
453
464
  "model": getattr(self.config, "search_name", "DuckDuckGo"),
454
465
  "icon_config": "search",
455
466
  "provider": getattr(self.config, 'search_provider', 'Crawl4AI'),
456
467
  "time": search_time,
457
- "cost": 0.0
468
+ "cost": 0.0,
469
+ "references": initial_refs,
470
+ "image_references": initial_images
458
471
  })
459
472
 
460
473
  # Add Crawler stage if Instruct used crawl_page
@@ -496,18 +509,24 @@ class ProcessingPipeline:
496
509
  a_model = a.get("model", "") or active_model
497
510
  a_base_url = a.get("base_url", "") or self.config.base_url
498
511
  steps = a.get("steps", [])
499
- agent_icon = getattr(self.config, "icon", None) or infer_icon(a_model, a_base_url)
512
+ agent_icon = infer_icon(a_model, a_base_url)
500
513
  agent_provider = infer_provider(a_base_url)
501
514
 
502
515
  for s in steps:
503
516
  if "tool_calls" in s:
504
517
  # 1. Agent Thought Stage (with LLM time)
518
+ # Calculate step cost
519
+ step_usage = s.get("usage", {})
520
+ step_cost = 0.0
521
+ if a_in_price > 0 or a_out_price > 0:
522
+ step_cost = (step_usage.get("input_tokens", 0) / 1_000_000 * a_in_price) + (step_usage.get("output_tokens", 0) / 1_000_000 * a_out_price)
523
+
505
524
  stages_used.append({
506
525
  "name": "Agent",
507
526
  "model": a_model,
508
527
  "icon_config": agent_icon,
509
528
  "provider": agent_provider,
510
- "time": s.get("llm_time", 0), "cost": 0
529
+ "time": s.get("llm_time", 0), "cost": step_cost
511
530
  })
512
531
 
513
532
  # 2. Grouped Tool Stages
@@ -587,11 +606,33 @@ class ProcessingPipeline:
587
606
  "time": 0, "cost": 0
588
607
  })
589
608
 
590
- # Assign total time/cost to last Agent stage
591
- last_agent = next((s for s in reversed(stages_used) if s["name"] == "Agent"), None)
592
- if last_agent:
593
- last_agent["time"] = a.get("time", 0)
594
- last_agent["cost"] = a.get("cost", 0.0)
609
+ # Assign total time/cost to last Agent stage
610
+ # Sum up total time/cost for UI/stats (implicit via loop above)
611
+ # No need to assign everything to last agent anymore as we distribute it.
612
+
613
+ # --- Final Filter: Only show cited items in workflow cards ---
614
+ cited_urls = {ref['url'] for ref in (structured.get("references", []) +
615
+ structured.get("page_references", []) +
616
+ structured.get("image_references", []))}
617
+
618
+ # Find images already rendered in markdown content (to avoid duplicate display)
619
+ markdown_image_urls = set()
620
+ md_img_pattern = re.compile(r'!\[.*?\]\((https?://[^)]+)\)')
621
+ for match in md_img_pattern.finditer(final_content):
622
+ markdown_image_urls.add(match.group(1))
623
+
624
+ for s in stages_used:
625
+ if "references" in s and s["references"]:
626
+ s["references"] = [r for r in s["references"] if r.get("url") in cited_urls]
627
+ # Filter out images already shown in markdown content
628
+ # Check both url AND thumbnail since either might be used in markdown
629
+ if "image_references" in s and s["image_references"]:
630
+ s["image_references"] = [
631
+ r for r in s["image_references"]
632
+ if r.get("url") not in markdown_image_urls and (r.get("thumbnail") or "") not in markdown_image_urls
633
+ ]
634
+ if "crawled_pages" in s and s["crawled_pages"]:
635
+ s["crawled_pages"] = [r for r in s["crawled_pages"] if r.get("url") in cited_urls]
595
636
 
596
637
  # Clean up conversation history: Remove tool calls and results to save tokens and avoid ID conflicts
597
638
  # Keep only 'user' messages and 'assistant' messages without tool_calls (final answers)
@@ -606,6 +647,67 @@ class ProcessingPipeline:
606
647
  # Update the reference (since it might be used by caller)
607
648
  current_history[:] = cleaned_history
608
649
 
650
+ # --- Apply cached images to reduce render time ---
651
+ # Collect all image URLs that need caching (avoid duplicates when thumbnail == url)
652
+ all_image_urls = set()
653
+ for img_ref in structured.get("image_references", []):
654
+ if img_ref.get("thumbnail"):
655
+ all_image_urls.add(img_ref["thumbnail"])
656
+ if img_ref.get("url"):
657
+ all_image_urls.add(img_ref["url"])
658
+
659
+ for stage in stages_used:
660
+ for img_ref in stage.get("image_references", []):
661
+ if img_ref.get("thumbnail"):
662
+ all_image_urls.add(img_ref["thumbnail"])
663
+ if img_ref.get("url"):
664
+ all_image_urls.add(img_ref["url"])
665
+
666
+ # Also collect image URLs from markdown content
667
+ markdown_img_pattern = re.compile(r'!\[.*?\]\((https?://[^)]+)\)')
668
+ markdown_urls = markdown_img_pattern.findall(final_content)
669
+ all_image_urls.update(markdown_urls)
670
+
671
+ # Get cached versions (waits for pending downloads, with timeout)
672
+ if all_image_urls:
673
+ try:
674
+ cached_map = await get_cached_images(list(all_image_urls), wait_timeout=3.0)
675
+
676
+ # Apply cached URLs to structured response
677
+ for img_ref in structured.get("image_references", []):
678
+ if img_ref.get("thumbnail") and img_ref["thumbnail"] in cached_map:
679
+ img_ref["thumbnail"] = cached_map[img_ref["thumbnail"]]
680
+ if img_ref.get("url") and img_ref["url"] in cached_map:
681
+ img_ref["url"] = cached_map[img_ref["url"]]
682
+
683
+ # Apply cached URLs to stages
684
+ for stage in stages_used:
685
+ for img_ref in stage.get("image_references", []):
686
+ if img_ref.get("thumbnail") and img_ref["thumbnail"] in cached_map:
687
+ img_ref["thumbnail"] = cached_map[img_ref["thumbnail"]]
688
+ if img_ref.get("url") and img_ref["url"] in cached_map:
689
+ img_ref["url"] = cached_map[img_ref["url"]]
690
+
691
+ # Replace image URLs in markdown content with cached versions
692
+ def replace_markdown_img(match):
693
+ full_match = match.group(0)
694
+ url = match.group(1)
695
+ cached_url = cached_map.get(url)
696
+ if cached_url and cached_url != url:
697
+ return full_match.replace(url, cached_url)
698
+ return full_match
699
+
700
+ final_content = markdown_img_pattern.sub(replace_markdown_img, final_content)
701
+ structured["response"] = markdown_img_pattern.sub(replace_markdown_img, structured.get("response", ""))
702
+
703
+ # Log cache stats
704
+ from .image_cache import get_image_cache
705
+ cache_stats = get_image_cache().get_stats()
706
+ logger.info(f"ImageCache stats: {cache_stats}")
707
+
708
+ except Exception as e:
709
+ logger.warning(f"Failed to apply image cache: {e}")
710
+
609
711
  return {
610
712
  "llm_response": final_content,
611
713
  "structured_response": structured,
@@ -627,11 +729,7 @@ class ProcessingPipeline:
627
729
  }
628
730
 
629
731
  def _parse_tagged_response(self, text: str) -> Dict[str, Any]:
630
- """Parse response and auto-infer references from [N] citations in body text.
631
-
632
- New simplified format:
633
- - Body text uses [1][2] format for citations
634
- - No ref code block needed - we auto-infer from citations
732
+ """Parse response and auto-infer references from citations and markdown images.
635
733
  """
636
734
  parsed = {"response": "", "references": [], "page_references": [], "image_references": [], "flow_steps": []}
637
735
  if not text:
@@ -639,9 +737,14 @@ class ProcessingPipeline:
639
737
 
640
738
  import re
641
739
 
642
- remaining_text = text
740
+ # 1. Strip trailing reference/source list
741
+ body_text = text
742
+ ref_list_pattern = re.compile(r'(?:\n\s*|^)\s*(?:#{1,3}|\*\*)\s*(?:References|Citations|Sources|参考资料|引用)[\s\S]*$', re.IGNORECASE | re.MULTILINE)
743
+ body_text = ref_list_pattern.sub('', body_text)
744
+
745
+ remaining_text = body_text.strip()
643
746
 
644
- # 1. Try to unwrap JSON if the model acted like a ReAct agent
747
+ # 2. Unwrap JSON if necessary
645
748
  try:
646
749
  if remaining_text.strip().startswith("{") and "action" in remaining_text:
647
750
  data = json.loads(remaining_text)
@@ -650,67 +753,114 @@ class ProcessingPipeline:
650
753
  except Exception:
651
754
  pass
652
755
 
653
- # 2. Extract all [N] citations from body text (scan left to right for order)
756
+ # 3. Identify all citations [N] and direct markdown images ![]()
757
+ cited_ids = []
654
758
  body_pattern = re.compile(r'\[(\d+)\]')
655
- id_order = [] # Preserve citation order
656
-
657
759
  for match in body_pattern.finditer(remaining_text):
658
760
  try:
659
- id_val = int(match.group(1))
660
- if id_val not in id_order:
661
- id_order.append(id_val)
662
- except ValueError:
663
- pass
664
-
665
- # 3. Build references by looking up cited IDs in all_web_results
666
- # Order by appearance in text
667
- old_to_new_map = {} # old_id -> new_id (for search & page only)
761
+ cited_ids.append(int(match.group(1)))
762
+ except ValueError: pass
763
+
764
+ # Also find direct URLs in ![]()
765
+ direct_image_urls = []
766
+ img_pattern = re.compile(r'!\[.*?\]\((.*?)\)')
767
+ for match in img_pattern.finditer(remaining_text):
768
+ url = match.group(1).strip()
769
+ if url and not url.startswith('['): # Not a [N] citation
770
+ direct_image_urls.append(url)
771
+
772
+ # 4. Build Citation Maps and Reference Lists
773
+ unified_id_map = {}
774
+ # Keep track of what we've already added to avoid duplicates
775
+ seen_urls = set()
668
776
 
777
+ # id_order needs to be unique and preserve appearance order
778
+ id_order = []
779
+ for id_val in cited_ids:
780
+ if id_val not in id_order:
781
+ id_order.append(id_val)
782
+
783
+ # Process [N] citations first to determine numbering
669
784
  for old_id in id_order:
670
- # Find in all_web_results by _id
671
785
  result_item = next((r for r in self.all_web_results if r.get("_id") == old_id), None)
786
+ if not result_item: continue
672
787
 
788
+ url = result_item.get("url", "")
789
+ item_type = result_item.get("_type", "")
790
+
791
+ entry = {
792
+ "title": result_item.get("title", ""),
793
+ "url": url,
794
+ "domain": result_item.get("domain", "")
795
+ }
796
+
797
+ if item_type == "search":
798
+ parsed["references"].append(entry)
799
+ unified_id_map[old_id] = len(parsed["references"]) + len(parsed["page_references"])
800
+ seen_urls.add(url)
801
+ elif item_type == "page":
802
+ parsed["page_references"].append(entry)
803
+ unified_id_map[old_id] = len(parsed["references"]) + len(parsed["page_references"])
804
+ seen_urls.add(url)
805
+ elif item_type == "image":
806
+ entry["thumbnail"] = result_item.get("thumbnail", "")
807
+ if url not in seen_urls:
808
+ parsed["image_references"].append(entry)
809
+ seen_urls.add(url)
810
+ # Note: Images cited as [N] might be used in text like ![...]([N])
811
+ # We'll handle this in replacement
812
+
813
+ # Now handle direct image URLs from ![]() that weren't cited as [N]
814
+ for url in direct_image_urls:
815
+ if url in seen_urls: continue
816
+ # Find in all_web_results
817
+ result_item = next((r for r in self.all_web_results if (r.get("url") == url or r.get("image") == url) and r.get("_type") == "image"), None)
673
818
  if result_item:
674
819
  entry = {
675
820
  "title": result_item.get("title", ""),
676
- "url": result_item.get("url", ""),
677
- "domain": result_item.get("domain", "")
821
+ "url": url,
822
+ "domain": result_item.get("domain", ""),
823
+ "thumbnail": result_item.get("thumbnail", "")
678
824
  }
679
-
680
- item_type = result_item.get("_type", "")
681
-
682
- # Auto-classify by type
683
- if item_type == "search":
684
- parsed["references"].append(entry)
685
- old_to_new_map[old_id] = len(parsed["references"])
686
- elif item_type == "page":
687
- parsed["page_references"].append(entry)
688
- old_to_new_map[old_id] = len(parsed["page_references"])
689
- elif item_type == "image":
690
- # Collect image but don't add to map (will be stripped from text)
691
- entry["thumbnail"] = result_item.get("thumbnail", "")
692
- parsed["image_references"].append(entry)
693
- # Note: no old_to_new_map entry - image citations will be removed
694
-
695
- # 4. Replace [old_id] with [new_id] in text, or remove if image
696
- def replace_id(match):
697
- try:
698
- old_id = int(match.group(1))
699
- new_id = old_to_new_map.get(old_id)
700
- if new_id is not None:
701
- return f"[{new_id}]"
702
- else:
703
- # Check if it's an image reference (not in map)
704
- item = next((r for r in self.all_web_results if r.get("_id") == old_id), None)
705
- if item and item.get("_type") == "image":
706
- return "" # Remove image citations from text
707
- except ValueError:
708
- pass
709
- return match.group(0)
825
+ parsed["image_references"].append(entry)
826
+ seen_urls.add(url)
710
827
 
711
- remaining_text = body_pattern.sub(replace_id, remaining_text)
828
+ # 5. Replacement Logic
829
+ # Define image replacement map separately to handle ![...]([N])
830
+ image_url_map = {} # old_id -> raw_url
831
+ for old_id in id_order:
832
+ item = next((r for r in self.all_web_results if r.get("_id") == old_id), None)
833
+ if item and item.get("_type") == "image":
834
+ image_url_map[old_id] = item.get("url", "")
835
+
836
+ def refined_replace(text):
837
+ # First, handle ![...]([N]) specifically
838
+ # We want to replace the [N] with the actual URL so the markdown renders
839
+ def sub_img_ref(match):
840
+ alt = match.group(1)
841
+ ref = match.group(2)
842
+ inner_match = body_pattern.match(ref)
843
+ if inner_match:
844
+ oid = int(inner_match.group(1))
845
+ if oid in image_url_map:
846
+ return f"![{alt}]({image_url_map[oid]})"
847
+ return match.group(0)
848
+
849
+ text = re.sub(r'!\[(.*?)\]\((.*?)\)', sub_img_ref, text)
850
+
851
+ # Then handle normal [N] replacements
852
+ def sub_norm_ref(match):
853
+ oid = int(match.group(1))
854
+ if oid in unified_id_map:
855
+ return f"[{unified_id_map[oid]}]"
856
+ if oid in image_url_map:
857
+ return "" # Remove standalone image citations like [5] if they aren't in ![]()
858
+ return match.group(0)
859
+
860
+ return body_pattern.sub(sub_norm_ref, text)
712
861
 
713
- parsed["response"] = remaining_text.strip()
862
+ final_text = refined_replace(remaining_text)
863
+ parsed["response"] = final_text.strip()
714
864
  return parsed
715
865
 
716
866
  async def _safe_route_tool(self, tool_call):
@@ -1053,4 +1203,4 @@ class ProcessingPipeline:
1053
1203
  except Exception:
1054
1204
  pass
1055
1205
  # Do NOT close shared crawler here, as pipeline instances are now per-request.
1056
- # Shared crawler lifecycle is managed by HYW.close() or global cleanup.
1206
+ # Shared crawler lifecycle is managed globally.
@@ -34,7 +34,7 @@ INSTRUCT_SP = """# 你是一个专业的指导专家.
34
34
  {tools_desc}
35
35
 
36
36
  ## 你的回复
37
- 调用工具后无需回复额外文本节省token.
37
+ 调用工具后无需回复额外文本节省 token.
38
38
 
39
39
  ## 用户消息
40
40
  ```
@@ -57,16 +57,19 @@ AGENT_SP = """# 你是一个 Agent 总控专家, 你需要理解用户意图,
57
57
 
58
58
  ## 过程要求
59
59
  当不调用工具发送文本, 即会变成最终回复, 请遵守:
60
- - 直接给出一篇报告, 无需回答用户消息
61
60
  - 语言: {language}, 百科式风格, 语言严谨不啰嗦.
62
61
  - 正文格式:
63
- - 使用 Markdown 格式, 支持 hightlight, katex
64
- - 最开始给出`# `大标题, 不要有多余废话, 不要直接回答用户的提问.
65
- - 内容丰富突出重点.
62
+ - 先给出一个 `# `大标题约 8-10 个字, 不要有多余废话, 不要直接回答用户的提问.
63
+ - 然后紧接着给出一个 <summary>...</summary>, 除了给出一个约 100 字的纯文本简介, 介绍本次输出的长文的清晰、重点概括.
64
+ - 随后开始详细二级标题 + markdown 正文, 语言描绘格式丰富多样, 简洁准确可信.
65
+ - 请不要给出过长的代码、表格列数等, 请控制字数在 600 字内, 只讲重点和准确的数据.
66
+ - 不支持渲染: 链接, 图片链接, mermaid
67
+ - 支持渲染: 公式, 代码高亮, 只在需要的时候给出.
68
+ - 图片链接、链接框架会自动渲染出, 你无需显式给出.
66
69
  - 引用:
67
70
  > 重要: 所有正文内容必须基于实际信息, 保证百分百真实度
68
71
  - 信息来源已按获取顺序编号为 [1], [2], [3]...
69
- - 正文中直接使用 [1][2] 格式引用, 只引用对回答有帮助的来源
72
+ - 正文中直接使用 [1] 格式引用, 只引用对回答有帮助的来源, 一次只能引用一个
70
73
  - 无需给出参考文献列表, 系统会自动生成
71
74
 
72
75
  ## 用户消息
@@ -96,24 +99,6 @@ AGENT_SP_INSTRUCT_VISION_ADD = """
96
99
  """
97
100
 
98
101
  AGENT_SP_SEARCH_ADD = """
99
- ## 搜索专家消息
100
- ```text
102
+ ## 联网信息
101
103
  {search_msgs}
102
- ```
103
- """
104
-
105
- AGENT_SP_PAGE_ADD = """
106
- ## 页面内容专家消息
107
- ```text
108
- {page_msgs}
109
- ```
110
- - 引用页面内容时, 必须使用 `page:id` 格式
111
- """
112
-
113
- AGENT_SP_IMAGE_SEARCH_ADD = """
114
- ## 图像搜索专家消息
115
- ```text
116
- {image_search_msgs}
117
- ```
118
- - 每进行一次 internal_image_search, 挑选 1 张图像插入正文
119
104
  """