entari-plugin-hyw 3.3.7__py3-none-any.whl → 3.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of entari-plugin-hyw might be problematic. Click here for more details.

@@ -58,13 +58,11 @@ class HywConfig(BasicConfModel):
58
58
  base_url: str = "https://openrouter.ai/api/v1"
59
59
  vision_model_name: Optional[str] = None
60
60
  vision_api_key: Optional[str] = None
61
+ language: str = "Simplified Chinese"
61
62
  vision_base_url: Optional[str] = None
62
- vision_system_prompt: Optional[str] = None
63
- intruct_model_name: Optional[str] = None
64
- intruct_api_key: Optional[str] = None
65
- intruct_base_url: Optional[str] = None
66
- intruct_system_prompt: Optional[str] = None
67
- agent_system_prompt: Optional[str] = None
63
+ instruct_model_name: Optional[str] = None
64
+ instruct_api_key: Optional[str] = None
65
+ instruct_base_url: Optional[str] = None
68
66
  search_base_url: str = "https://lite.duckduckgo.com/lite/?q={query}"
69
67
  image_search_base_url: str = "https://duckduckgo.com/?q={query}&iax=images&ia=images"
70
68
  headless: bool = False
@@ -72,8 +70,10 @@ class HywConfig(BasicConfModel):
72
70
  icon: str = "openai"
73
71
  render_timeout_ms: int = 6000
74
72
  extra_body: Optional[Dict[str, Any]] = None
73
+ vision_extra_body: Optional[Dict[str, Any]] = None
74
+ instruct_extra_body: Optional[Dict[str, Any]] = None
75
75
  enable_browser_fallback: bool = False
76
- reaction: bool = True
76
+ reaction: bool = False
77
77
  quote: bool = True
78
78
  temperature: float = 0.4
79
79
  # Billing configuration (price per million tokens)
@@ -83,14 +83,14 @@ class HywConfig(BasicConfModel):
83
83
  vision_input_price: Optional[float] = None
84
84
  vision_output_price: Optional[float] = None
85
85
  # Instruct model pricing overrides (defaults to main model pricing if not set)
86
- intruct_input_price: Optional[float] = None
87
- intruct_output_price: Optional[float] = None
86
+ instruct_input_price: Optional[float] = None
87
+ instruct_output_price: Optional[float] = None
88
88
  # Provider Names
89
89
  search_name: str = "DuckDuckGo"
90
90
  search_provider: str = "crawl4ai" # crawl4ai | httpx | ddgs
91
91
  model_provider: Optional[str] = None
92
92
  vision_model_provider: Optional[str] = None
93
- intruct_model_provider: Optional[str] = None
93
+ instruct_model_provider: Optional[str] = None
94
94
 
95
95
 
96
96
 
Binary file
@@ -12,27 +12,27 @@ class HYWConfig:
12
12
  fusion_mode: bool = False
13
13
  save_conversation: bool = False
14
14
  headless: bool = True
15
- intruct_model_name: Optional[str] = None
16
- intruct_api_key: Optional[str] = None
17
- intruct_base_url: Optional[str] = None
15
+ instruct_model_name: Optional[str] = None
16
+ instruct_api_key: Optional[str] = None
17
+ instruct_base_url: Optional[str] = None
18
18
  search_base_url: str = "https://lite.duckduckgo.com/lite/?q={query}"
19
19
  image_search_base_url: str = "https://duckduckgo.com/?q={query}&iax=images&ia=images"
20
20
  search_params: Optional[str] = None # e.g. "&kl=cn-zh" for China region
21
21
  search_limit: int = 8
22
22
  extra_body: Optional[Dict[str, Any]] = None
23
+ vision_extra_body: Optional[Dict[str, Any]] = None
24
+ instruct_extra_body: Optional[Dict[str, Any]] = None
23
25
  temperature: float = 0.4
24
26
  max_turns: int = 10
25
27
  icon: str = "openai" # logo for primary model
26
28
  vision_icon: Optional[str] = None # logo for vision model (falls back to icon when absent)
27
29
  instruct_icon: Optional[str] = None # logo for instruct model
28
30
  enable_browser_fallback: bool = False
29
- vision_system_prompt: Optional[str] = None
30
- intruct_system_prompt: Optional[str] = None
31
- agent_system_prompt: Optional[str] = None
31
+ language: str = "Simplified Chinese"
32
32
  input_price: Optional[float] = None # $ per 1M input tokens
33
33
  output_price: Optional[float] = None # $ per 1M output tokens
34
34
  vision_input_price: Optional[float] = None
35
35
  vision_output_price: Optional[float] = None
36
- intruct_input_price: Optional[float] = None
37
- intruct_output_price: Optional[float] = None
36
+ instruct_input_price: Optional[float] = None
37
+ instruct_output_price: Optional[float] = None
38
38
 
@@ -12,14 +12,14 @@ from .config import HYWConfig
12
12
  from ..utils.search import SearchService
13
13
  from ..utils.prompts import (
14
14
  AGENT_SP,
15
- AGENT_SP_INTRUCT_VISION_ADD,
15
+ AGENT_SP_INSTRUCT_VISION_ADD,
16
16
  AGENT_SP_TOOLS_STANDARD_ADD,
17
17
  AGENT_SP_TOOLS_AGENT_ADD,
18
18
  AGENT_SP_SEARCH_ADD,
19
19
  AGENT_SP_PAGE_ADD,
20
20
  AGENT_SP_IMAGE_SEARCH_ADD,
21
- INTRUCT_SP,
22
- INTRUCT_SP_VISION_ADD,
21
+ INSTRUCT_SP,
22
+ INSTRUCT_SP_VISION_ADD,
23
23
  VISION_SP,
24
24
  )
25
25
 
@@ -39,10 +39,8 @@ class ProcessingPipeline:
39
39
  self.client = AsyncOpenAI(base_url=self.config.base_url, api_key=self.config.api_key)
40
40
  self.all_web_results = [] # Cache for search results
41
41
  self.current_mode = "standard" # standard | agent
42
- # Independent ID counters for each type
43
- self.search_id_counter = 0
44
- self.page_id_counter = 0
45
- self.image_id_counter = 0
42
+ # Global ID counter for all types (unified numbering)
43
+ self.global_id_counter = 0
46
44
 
47
45
  self.web_search_tool = {
48
46
  "type": "function",
@@ -109,7 +107,7 @@ class ProcessingPipeline:
109
107
  ) -> Dict[str, Any]:
110
108
  """
111
109
  1) Vision: summarize images once (no image persistence).
112
- 2) Intruct: run web_search and decide whether to grant Playwright MCP tools.
110
+ 2) Instruct: run web_search and decide whether to grant Playwright MCP tools.
113
111
  3) Agent: normally no tools; if granted, allow Playwright MCP tools (max 6 rounds; step 5 nudge, step 6 forced).
114
112
  """
115
113
  start_time = time.time()
@@ -133,7 +131,7 @@ class ProcessingPipeline:
133
131
 
134
132
  trace: Dict[str, Any] = {
135
133
  "vision": None,
136
- "intruct": None,
134
+ "instruct": None,
137
135
  "agent": None,
138
136
  }
139
137
 
@@ -150,8 +148,7 @@ class ProcessingPipeline:
150
148
  or getattr(self.config, "vision_model_name", None)
151
149
  or active_model
152
150
  )
153
- vision_prompt_tpl = getattr(self.config, "vision_system_prompt", None) or VISION_SP
154
- vision_prompt = vision_prompt_tpl.format(user_msgs=user_input or "[图片]")
151
+ vision_prompt = VISION_SP.format(user_msgs=user_input or "[图片]")
155
152
  vision_text, vision_usage = await self._run_vision_stage(
156
153
  user_input=user_input,
157
154
  images=images,
@@ -182,10 +179,10 @@ class ProcessingPipeline:
182
179
  "cost": vision_cost
183
180
  }
184
181
 
185
- # Intruct + pre-search
182
+ # Instruct + pre-search
186
183
  instruct_start = time.time()
187
- instruct_model = getattr(self.config, "intruct_model_name", None) or active_model
188
- instruct_text, search_payloads, intruct_trace, intruct_usage, search_time = await self._run_instruct_stage(
184
+ instruct_model = getattr(self.config, "instruct_model_name", None) or active_model
185
+ instruct_text, search_payloads, instruct_trace, instruct_usage, search_time = await self._run_instruct_stage(
189
186
  user_input=user_input,
190
187
  vision_text=vision_text,
191
188
  model=instruct_model,
@@ -194,24 +191,24 @@ class ProcessingPipeline:
194
191
 
195
192
  # Calculate Instruct Cost
196
193
  instruct_cost = 0.0
197
- i_in_price = float(getattr(self.config, "intruct_input_price", None) or getattr(self.config, "input_price", 0.0) or 0.0)
198
- i_out_price = float(getattr(self.config, "intruct_output_price", None) or getattr(self.config, "output_price", 0.0) or 0.0)
194
+ i_in_price = float(getattr(self.config, "instruct_input_price", None) or getattr(self.config, "input_price", 0.0) or 0.0)
195
+ i_out_price = float(getattr(self.config, "instruct_output_price", None) or getattr(self.config, "output_price", 0.0) or 0.0)
199
196
  if i_in_price > 0 or i_out_price > 0:
200
- instruct_cost = (intruct_usage.get("input_tokens", 0) / 1_000_000 * i_in_price) + (intruct_usage.get("output_tokens", 0) / 1_000_000 * i_out_price)
197
+ instruct_cost = (instruct_usage.get("input_tokens", 0) / 1_000_000 * i_in_price) + (instruct_usage.get("output_tokens", 0) / 1_000_000 * i_out_price)
201
198
 
202
199
  # Add instruct usage
203
- usage_totals["input_tokens"] += intruct_usage.get("input_tokens", 0)
204
- usage_totals["output_tokens"] += intruct_usage.get("output_tokens", 0)
200
+ usage_totals["input_tokens"] += instruct_usage.get("input_tokens", 0)
201
+ usage_totals["output_tokens"] += instruct_usage.get("output_tokens", 0)
205
202
 
206
- intruct_trace["time"] = instruct_time
207
- intruct_trace["cost"] = instruct_cost
208
- trace["intruct"] = intruct_trace
203
+ instruct_trace["time"] = instruct_time
204
+ instruct_trace["cost"] = instruct_cost
205
+ trace["instruct"] = instruct_trace
209
206
 
210
207
  # Start agent loop
211
208
  agent_start_time = time.time()
212
209
  current_history.append({"role": "user", "content": user_input or "..."})
213
210
 
214
- mode = intruct_trace.get("mode", self.current_mode).lower()
211
+ mode = instruct_trace.get("mode", self.current_mode).lower()
215
212
  logger.success(f"Instruct Mode: {mode}")
216
213
  self.current_mode = mode
217
214
 
@@ -255,18 +252,17 @@ class ProcessingPipeline:
255
252
  has_image_results = any(r.get("_type") == "image" for r in self.all_web_results)
256
253
 
257
254
  # Build agent system prompt
258
- agent_prompt_tpl = getattr(self.config, "agent_system_prompt", None) or AGENT_SP
259
-
260
255
  mode_desc_text = AGENT_SP_TOOLS_AGENT_ADD.format(tools_desc=tools_desc) if mode == "agent" else AGENT_SP_TOOLS_STANDARD_ADD
261
- system_prompt = agent_prompt_tpl.format(
256
+ system_prompt = AGENT_SP.format(
262
257
  user_msgs=user_msgs_text,
263
258
  mode=mode,
264
- mode_desc=mode_desc_text
259
+ mode_desc=mode_desc_text,
260
+ language=getattr(self.config, "language", "Simplified Chinese")[:128]
265
261
  )
266
262
 
267
263
  # Append vision text if available
268
264
  if vision_text:
269
- system_prompt += AGENT_SP_INTRUCT_VISION_ADD.format(vision_msgs=vision_text)
265
+ system_prompt += AGENT_SP_INSTRUCT_VISION_ADD.format(vision_msgs=vision_text)
270
266
 
271
267
  # Append search results
272
268
  if has_search_results and search_msgs_text:
@@ -299,6 +295,7 @@ class ProcessingPipeline:
299
295
  model=active_model,
300
296
  tools=tools_for_step,
301
297
  tool_choice="auto" if tools_for_step else None,
298
+ extra_body=self.config.extra_body,
302
299
  )
303
300
  step_llm_time = time.time() - step_llm_start
304
301
 
@@ -366,8 +363,8 @@ class ProcessingPipeline:
366
363
  a_in_price = float(getattr(self.config, "input_price", 0.0) or 0.0)
367
364
  a_out_price = float(getattr(self.config, "output_price", 0.0) or 0.0)
368
365
 
369
- agent_input_tokens = usage_totals["input_tokens"] - vision_usage.get("input_tokens", 0) - intruct_usage.get("input_tokens", 0)
370
- agent_output_tokens = usage_totals["output_tokens"] - vision_usage.get("output_tokens", 0) - intruct_usage.get("output_tokens", 0)
366
+ agent_input_tokens = usage_totals["input_tokens"] - vision_usage.get("input_tokens", 0) - instruct_usage.get("input_tokens", 0)
367
+ agent_output_tokens = usage_totals["output_tokens"] - vision_usage.get("output_tokens", 0) - instruct_usage.get("output_tokens", 0)
371
368
 
372
369
  if a_in_price > 0 or a_out_price > 0:
373
370
  agent_cost = (max(0, agent_input_tokens) / 1_000_000 * a_in_price) + (max(0, agent_output_tokens) / 1_000_000 * a_out_price)
@@ -436,14 +433,14 @@ class ProcessingPipeline:
436
433
  "cost": v.get("cost", 0.0)
437
434
  })
438
435
 
439
- if trace.get("intruct"):
440
- i = trace["intruct"]
436
+ if trace.get("instruct"):
437
+ i = trace["instruct"]
441
438
  i_model = i.get("model", "")
442
439
  i_base_url = i.get("base_url", "") or self.config.base_url
443
440
  stages_used.append({
444
441
  "name": "Instruct",
445
442
  "model": i_model,
446
- "icon_config": getattr(self.config, "instruct_icon", None) or getattr(self.config, "intruct_icon", None) or infer_icon(i_model, i_base_url),
443
+ "icon_config": getattr(self.config, "instruct_icon", None) or infer_icon(i_model, i_base_url),
447
444
  "provider": infer_provider(i_base_url),
448
445
  "time": i.get("time", 0),
449
446
  "cost": i.get("cost", 0.0)
@@ -460,9 +457,9 @@ class ProcessingPipeline:
460
457
  })
461
458
 
462
459
  # Add Crawler stage if Instruct used crawl_page
463
- if trace.get("intruct"):
464
- intruct_tool_calls = trace["intruct"].get("tool_calls", [])
465
- crawl_calls = [tc for tc in intruct_tool_calls if tc.get("name") == "crawl_page"]
460
+ if trace.get("instruct"):
461
+ instruct_tool_calls = trace["instruct"].get("tool_calls", [])
462
+ crawl_calls = [tc for tc in instruct_tool_calls if tc.get("name") == "crawl_page"]
466
463
  if crawl_calls:
467
464
  # Build crawled_pages list for UI
468
465
  crawled_pages = []
@@ -629,7 +626,12 @@ class ProcessingPipeline:
629
626
  }
630
627
 
631
628
  def _parse_tagged_response(self, text: str) -> Dict[str, Any]:
632
- """Parse response for references and page references reordered by appearance."""
629
+ """Parse response and auto-infer references from [N] citations in body text.
630
+
631
+ New simplified format:
632
+ - Body text uses [1][2] format for citations
633
+ - No ref code block needed - we auto-infer from citations
634
+ """
633
635
  parsed = {"response": "", "references": [], "page_references": [], "image_references": [], "flow_steps": []}
634
636
  if not text:
635
637
  return parsed
@@ -647,104 +649,65 @@ class ProcessingPipeline:
647
649
  except Exception:
648
650
  pass
649
651
 
650
- # 2. Extract references from text first (Order by appearance)
651
- # Pattern matches [search:123], [page:123], [image:123]
652
- pattern = re.compile(r'\[(search|page|image):(\d+)\]', re.IGNORECASE)
653
-
654
- matches = list(pattern.finditer(remaining_text))
652
+ # 2. Extract all [N] citations from body text (scan left to right for order)
653
+ body_pattern = re.compile(r'\[(\d+)\]')
654
+ id_order = [] # Preserve citation order
655
655
 
656
- search_map = {} # old_id_str -> new_id (int)
657
- page_map = {}
658
- image_map = {}
656
+ for match in body_pattern.finditer(remaining_text):
657
+ try:
658
+ id_val = int(match.group(1))
659
+ if id_val not in id_order:
660
+ id_order.append(id_val)
661
+ except ValueError:
662
+ pass
663
+
664
+ # 3. Build references by looking up cited IDs in all_web_results
665
+ # Order by appearance in text
666
+ old_to_new_map = {} # old_id -> new_id (for search & page only)
659
667
 
660
- def process_ref(tag_type, old_id):
661
- # Find in all_web_results
662
- result_item = next((r for r in self.all_web_results if r.get("_id") == old_id and r.get("_type") == tag_type), None)
668
+ for old_id in id_order:
669
+ # Find in all_web_results by _id
670
+ result_item = next((r for r in self.all_web_results if r.get("_id") == old_id), None)
663
671
 
664
- if not result_item:
665
- return
672
+ if result_item:
673
+ entry = {
674
+ "title": result_item.get("title", ""),
675
+ "url": result_item.get("url", ""),
676
+ "domain": result_item.get("domain", "")
677
+ }
666
678
 
667
- entry = {
668
- "title": result_item.get("title", ""),
669
- "url": result_item.get("url", ""),
670
- "domain": result_item.get("domain", "")
671
- }
672
- if tag_type == "image":
673
- entry["thumbnail"] = result_item.get("thumbnail", "")
674
-
675
- # Add to respective list and map
676
- # Check maps to avoid duplicates
677
- if tag_type == "search":
678
- if str(old_id) not in search_map:
679
+ item_type = result_item.get("_type", "")
680
+
681
+ # Auto-classify by type
682
+ if item_type == "search":
679
683
  parsed["references"].append(entry)
680
- search_map[str(old_id)] = len(parsed["references"])
681
- elif tag_type == "page":
682
- if str(old_id) not in page_map:
684
+ old_to_new_map[old_id] = len(parsed["references"])
685
+ elif item_type == "page":
683
686
  parsed["page_references"].append(entry)
684
- page_map[str(old_id)] = len(parsed["page_references"])
685
- elif tag_type == "image":
686
- if str(old_id) not in image_map:
687
+ old_to_new_map[old_id] = len(parsed["page_references"])
688
+ elif item_type == "image":
689
+ # Collect image but don't add to map (will be stripped from text)
690
+ entry["thumbnail"] = result_item.get("thumbnail", "")
687
691
  parsed["image_references"].append(entry)
688
- image_map[str(old_id)] = len(parsed["image_references"])
692
+ # Note: no old_to_new_map entry - image citations will be removed
689
693
 
690
- # Pass 1: Text Body
691
- for m in matches:
694
+ # 4. Replace [old_id] with [new_id] in text, or remove if image
695
+ def replace_id(match):
692
696
  try:
693
- process_ref(m.group(1).lower(), int(m.group(2)))
694
- except ValueError:
695
- continue
696
-
697
- # 3. Pass 2: References Block (Capture items missed in text)
698
- ref_block_match = re.search(r'```references\s*(.*?)\s*```', remaining_text, re.DOTALL | re.IGNORECASE)
699
- if ref_block_match:
700
- ref_content = ref_block_match.group(1).strip()
701
- remaining_text = remaining_text.replace(ref_block_match.group(0), "").strip()
702
-
703
- for line in ref_content.split("\n"):
704
- line = line.strip()
705
- if not line: continue
706
- # Match [id] [type]
707
- # e.g. [1] [image] ... or [image:1] ...
708
-
709
- # Check for [id] [type] format
710
- id_match = re.match(r"^\[(\d+)\]\s*\[(search|page|image)\]", line, re.IGNORECASE)
711
- if id_match:
712
- try:
713
- process_ref(id_match.group(2).lower(), int(id_match.group(1)))
714
- except ValueError:
715
- pass
697
+ old_id = int(match.group(1))
698
+ new_id = old_to_new_map.get(old_id)
699
+ if new_id is not None:
700
+ return f"[{new_id}]"
716
701
  else:
717
- # Check for [type:id] format in list
718
- alt_match = re.match(r"^\[(search|page|image):(\d+)\]", line, re.IGNORECASE)
719
- if alt_match:
720
- try:
721
- process_ref(alt_match.group(1).lower(), int(alt_match.group(2)))
722
- except ValueError:
723
- pass
724
-
725
- # 4. Replace tags in text with new sequential IDs
726
-
727
- # 4. Replace tags in text with new sequential IDs
728
- def replace_tag(match):
729
- tag_type = match.group(1).lower()
730
- old_id = match.group(2)
731
-
732
- new_id = None
733
- if tag_type == "search":
734
- new_id = search_map.get(old_id)
735
- elif tag_type == "page":
736
- new_id = page_map.get(old_id)
737
- elif tag_type == "image":
738
- new_id = image_map.get(old_id)
739
-
740
- if new_id is not None:
741
- if tag_type == "image":
742
- return ""
743
- return f"[{tag_type}:{new_id}]"
744
-
702
+ # Check if it's an image reference (not in map)
703
+ item = next((r for r in self.all_web_results if r.get("_id") == old_id), None)
704
+ if item and item.get("_type") == "image":
705
+ return "" # Remove image citations from text
706
+ except ValueError:
707
+ pass
745
708
  return match.group(0)
746
709
 
747
- remaining_text = pattern.sub(replace_tag, remaining_text)
710
+ remaining_text = body_pattern.sub(replace_id, remaining_text)
748
711
 
749
712
  parsed["response"] = remaining_text.strip()
750
713
  return parsed
@@ -767,10 +730,10 @@ class ProcessingPipeline:
767
730
  query = args.get("query")
768
731
  web = await self.search_service.search(query)
769
732
 
770
- # Cache results and assign search-specific IDs
733
+ # Cache results and assign global IDs
771
734
  for item in web:
772
- self.search_id_counter += 1
773
- item["_id"] = self.search_id_counter
735
+ self.global_id_counter += 1
736
+ item["_id"] = self.global_id_counter
774
737
  item["_type"] = "search"
775
738
  item["query"] = query
776
739
  self.all_web_results.append(item)
@@ -781,10 +744,10 @@ class ProcessingPipeline:
781
744
  query = args.get("query")
782
745
  images = await self.search_service.image_search(query)
783
746
 
784
- # Cache results and assign image-specific IDs
747
+ # Cache results and assign global IDs
785
748
  for item in images:
786
- self.image_id_counter += 1
787
- item["_id"] = self.image_id_counter
749
+ self.global_id_counter += 1
750
+ item["_id"] = self.global_id_counter
788
751
  item["_type"] = "image"
789
752
  item["query"] = query
790
753
  item["is_image"] = True
@@ -798,11 +761,11 @@ class ProcessingPipeline:
798
761
  # Returns Dict: {content, title, url}
799
762
  result_dict = await self.search_service.fetch_page(url)
800
763
 
801
- # Cache the crawled content with page-specific ID
802
- self.page_id_counter += 1
764
+ # Cache the crawled content with global ID
765
+ self.global_id_counter += 1
803
766
 
804
767
  cached_item = {
805
- "_id": self.page_id_counter,
768
+ "_id": self.global_id_counter,
806
769
  "_type": "page",
807
770
  "title": result_dict.get("title", "Page"),
808
771
  "url": result_dict.get("url", url),
@@ -828,10 +791,10 @@ class ProcessingPipeline:
828
791
  return f"Unknown tool {name}"
829
792
 
830
793
 
831
- async def _safe_llm_call(self, messages, model, tools=None, tool_choice=None, client: Optional[AsyncOpenAI] = None):
794
+ async def _safe_llm_call(self, messages, model, tools=None, tool_choice=None, client: Optional[AsyncOpenAI] = None, extra_body: Optional[Dict[str, Any]] = None):
832
795
  try:
833
796
  return await asyncio.wait_for(
834
- self._do_llm_request(messages, model, tools, tool_choice, client=client or self.client),
797
+ self._do_llm_request(messages, model, tools, tool_choice, client=client or self.client, extra_body=extra_body),
835
798
  timeout=120.0,
836
799
  )
837
800
  except asyncio.TimeoutError:
@@ -841,7 +804,7 @@ class ProcessingPipeline:
841
804
  logger.error(f"LLM Call Failed: {e}")
842
805
  return type("obj", (object,), {"content": f"Error: Model failure ({e})", "tool_calls": None})(), {"input_tokens": 0, "output_tokens": 0}
843
806
 
844
- async def _do_llm_request(self, messages, model, tools, tool_choice, client: AsyncOpenAI):
807
+ async def _do_llm_request(self, messages, model, tools, tool_choice, client: AsyncOpenAI, extra_body: Optional[Dict[str, Any]] = None):
845
808
  try:
846
809
  payload_debug = json.dumps(messages)
847
810
  logger.info(f"LLM Request Payload Size: {len(payload_debug)} chars")
@@ -856,6 +819,7 @@ class ProcessingPipeline:
856
819
  tools=tools,
857
820
  tool_choice=tool_choice,
858
821
  temperature=self.config.temperature,
822
+ extra_body=extra_body,
859
823
  )
860
824
  logger.info(f"LLM Request RECEIVED after {time.time() - t0:.2f}s")
861
825
 
@@ -880,6 +844,7 @@ class ProcessingPipeline:
880
844
  messages=[{"role": "system", "content": prompt}, {"role": "user", "content": content_payload}],
881
845
  model=model,
882
846
  client=client,
847
+ extra_body=getattr(self.config, "vision_extra_body", None),
883
848
  )
884
849
  return (response.content or "").strip(), usage
885
850
 
@@ -891,15 +856,14 @@ class ProcessingPipeline:
891
856
  tools = [self.web_search_tool, self.image_search_tool, self.set_mode_tool, self.crawl_page_tool]
892
857
  tools_desc = "- internal_web_search: 搜索文本\n- internal_image_search: 搜索图片\n- crawl_page: 获取网页内容\n- set_mode: 设定standard/agent模式"
893
858
 
894
- prompt_tpl = getattr(self.config, "intruct_system_prompt", None) or INTRUCT_SP
895
- prompt = prompt_tpl.format(user_msgs=user_input or "", tools_desc=tools_desc)
859
+ prompt = INSTRUCT_SP.format(user_msgs=user_input or "", tools_desc=tools_desc)
896
860
 
897
861
  if vision_text:
898
- prompt = f"{prompt}\\n\\n{INTRUCT_SP_VISION_ADD.format(vision_msgs=vision_text)}"
862
+ prompt = f"{prompt}\\n\\n{INSTRUCT_SP_VISION_ADD.format(vision_msgs=vision_text)}"
899
863
 
900
864
  client = self._client_for(
901
- api_key=getattr(self.config, "intruct_api_key", None),
902
- base_url=getattr(self.config, "intruct_base_url", None),
865
+ api_key=getattr(self.config, "instruct_api_key", None),
866
+ base_url=getattr(self.config, "instruct_base_url", None),
903
867
  )
904
868
 
905
869
  history: List[Dict[str, Any]] = [
@@ -913,12 +877,13 @@ class ProcessingPipeline:
913
877
  tools=tools,
914
878
  tool_choice="auto",
915
879
  client=client,
880
+ extra_body=getattr(self.config, "instruct_extra_body", None),
916
881
  )
917
882
 
918
883
  search_payloads: List[str] = []
919
- intruct_trace: Dict[str, Any] = {
884
+ instruct_trace: Dict[str, Any] = {
920
885
  "model": model,
921
- "base_url": getattr(self.config, "intruct_base_url", None) or self.config.base_url,
886
+ "base_url": getattr(self.config, "instruct_base_url", None) or self.config.base_url,
922
887
  "prompt": prompt,
923
888
  "user_input": user_input or "",
924
889
  "vision_add": vision_text or "",
@@ -946,8 +911,8 @@ class ProcessingPipeline:
946
911
  history.append(
947
912
  {"tool_call_id": tc.id, "role": "tool", "name": tc.function.name, "content": str(result)}
948
913
  )
949
- intruct_trace["tool_calls"].append(self._tool_call_to_trace(tc))
950
- intruct_trace["tool_results"].append({"name": tc.function.name, "content": str(result)})
914
+ instruct_trace["tool_calls"].append(self._tool_call_to_trace(tc))
915
+ instruct_trace["tool_results"].append({"name": tc.function.name, "content": str(result)})
951
916
 
952
917
  if tc.function.name in ["web_search", "internal_web_search"]:
953
918
  search_payloads.append(str(result))
@@ -959,18 +924,18 @@ class ProcessingPipeline:
959
924
  mode = args.get("mode", mode)
960
925
  mode_reason = args.get("reason", "")
961
926
 
962
- intruct_trace["mode"] = mode
927
+ instruct_trace["mode"] = mode
963
928
  if mode_reason:
964
- intruct_trace["mode_reason"] = mode_reason
929
+ instruct_trace["mode_reason"] = mode_reason
965
930
 
966
- intruct_trace["output"] = ""
967
- intruct_trace["usage"] = usage
968
- return "", search_payloads, intruct_trace, usage, search_time
931
+ instruct_trace["output"] = ""
932
+ instruct_trace["usage"] = usage
933
+ return "", search_payloads, instruct_trace, usage, search_time
969
934
 
970
- intruct_trace["mode"] = mode
971
- intruct_trace["output"] = (response.content or "").strip()
972
- intruct_trace["usage"] = usage
973
- return "", search_payloads, intruct_trace, usage, 0.0
935
+ instruct_trace["mode"] = mode
936
+ instruct_trace["output"] = (response.content or "").strip()
937
+ instruct_trace["usage"] = usage
938
+ return "", search_payloads, instruct_trace, usage, 0.0
974
939
 
975
940
  def _format_search_msgs(self) -> str:
976
941
  """Format search snippets only (not crawled pages)."""
@@ -1050,9 +1015,9 @@ class ProcessingPipeline:
1050
1015
  parts.append(fence("text", v.get("output", "")))
1051
1016
  parts.append("")
1052
1017
 
1053
- if trace.get("intruct"):
1054
- t = trace["intruct"]
1055
- parts.append("## Intruct\n")
1018
+ if trace.get("instruct"):
1019
+ t = trace["instruct"]
1020
+ parts.append("## Instruct\n")
1056
1021
  parts.append(f"- model: `{t.get('model')}`")
1057
1022
  parts.append(f"- base_url: `{t.get('base_url')}`\n")
1058
1023
  parts.append("### Prompt\n")
@@ -266,18 +266,27 @@ class ContentRenderer:
266
266
 
267
267
  content_html = restore_math(content_html)
268
268
 
269
- # Convert [search:N] to blue badge
270
- content_html = re.sub(
271
- r'\[search:(\d+)\]',
272
- r'<span class="inline-flex items-center justify-center min-w-[16px] h-4 px-0.5 text-[10px] font-bold text-blue-600 bg-blue-50 border border-blue-200 rounded mx-0.5 align-top relative -top-0.5">\1</span>',
273
- content_html
274
- )
275
- # Convert [page:N] to orange badge
276
- content_html = re.sub(
277
- r'\[page:(\d+)\]',
278
- r'<span class="inline-flex items-center justify-center min-w-[16px] h-4 px-0.5 text-[10px] font-bold text-orange-700 bg-orange-50 border border-orange-200 rounded mx-0.5 align-top relative -top-0.5">\1</span>',
279
- content_html
280
- )
269
+ # Convert [N] to colored badges based on index position
270
+ # - Numbers 1 to len(references) → blue (search results)
271
+ # - Numbers len(references)+1 to len(references)+len(page_references) → orange (page content)
272
+
273
+ num_search_refs = len(references) if references else 0
274
+ num_page_refs = len(page_references) if page_references else 0
275
+
276
+ def replace_badge(match):
277
+ n = int(match.group(1))
278
+ if 1 <= n <= num_search_refs:
279
+ # Blue badge for search results
280
+ return f'<span class="inline-flex items-center justify-center min-w-[16px] h-4 px-0.5 text-[10px] font-bold text-blue-600 bg-blue-50 border border-blue-200 rounded mx-0.5 align-top relative -top-0.5">{n}</span>'
281
+ elif num_search_refs < n <= num_search_refs + num_page_refs:
282
+ # Orange badge for page content (renumber from 1)
283
+ page_num = n - num_search_refs
284
+ return f'<span class="inline-flex items-center justify-center min-w-[16px] h-4 px-0.5 text-[10px] font-bold text-orange-700 bg-orange-50 border border-orange-200 rounded mx-0.5 align-top relative -top-0.5">{page_num}</span>'
285
+ else:
286
+ # Fallback: keep original if out of range
287
+ return match.group(0)
288
+
289
+ content_html = re.sub(r'\[(\d+)\]', replace_badge, content_html)
281
290
 
282
291
  # Strip out the references code block if it leaked into the content
283
292
  content_html = re.sub(r'<pre><code[^>]*>.*?references.*?</code></pre>\s*$', '', content_html, flags=re.DOTALL | re.IGNORECASE)
@@ -1,140 +1,116 @@
1
- VISION_SP = """# 你是一个专业的视觉转文字专家.
1
+ VISION_SP = """# You are a professional vision-to-text expert.
2
2
 
3
- # 核心任务
4
- - 智能分析图片内容, 转述成文本, 除此之外不要添加任何内容
5
- - 文字优先: 若包含清晰文字(文档、截图等), 必须完整准确转录, 不要遗漏.
6
- - 视觉补充: 解释完文字后, 描述视觉内容总结(物体、场景、氛围).
7
- - 用户要求: 根据用户消息中提示侧重转文本的偏向, 若无关联则不理会.
3
+ # Core Tasks
4
+ - Intelligently analyze image content and paraphrase it into text. Do not add any other content.
5
+ - Text Priority: If there is clear text (documents, screenshots, etc.), it must be transcribed completely and accurately, without omission.
6
+ - Visual Supplement: After explaining the text, describe the visual content summary (objects, scenes, atmosphere).
7
+ - User Requirements: Focus on text transcription based on the hint in the user message, ignore if irrelevant.
8
8
 
9
- ## 用户消息
9
+ ## User Message
10
10
  ```text
11
11
  {user_msgs}
12
12
  ```
13
13
  """
14
14
 
15
- INTRUCT_SP = """# 你是一个专业的指导专家.
16
-
17
- ## 核心任务
18
- - 决定预处理工具:
19
- - 用户消息包含链接: 调用 crawl_page 获取内容, 无需其他工具
20
- - 用户消息包含典型名词、可能的专有名词组合: 调用 internal_web_search
21
- - 提炼出关键词搜索关键词本身, 不添加任何其他助词, 搜索效果最好
22
- - 如果用户消息关键词清晰, 使用图片搜索能搜索出诸如海报、地标、物品、角色立绘等, 调用 internal_image_search
23
- - 用户消息不需要搜索: 不调用工具
24
- - 调用 set_mode:
25
- - 绝大部分常规问题: standard
26
- - 用户要求研究/深度搜索: agent
27
- - 需要获取页面具体信息才能回答问题: agent
28
- > 所有工具需要在本次对话同时调用
29
-
30
- ## 调用工具
31
- - 使用工具时, 必须通过 function_call / tool_call 机制调用.
15
+ INSTRUCT_SP = """# You are a professional instruction expert.
16
+
17
+ ## Core Tasks
18
+ - Decide on preprocessing tools:
19
+ - User message contains a link: Call `crawl_page` to get content, no other tools needed.
20
+ - User message contains typical nouns or possible proper noun combinations: Call `internal_web_search`.
21
+ - Extract keywords to search for the keywords themselves, do not add any other particles, for best search results.
22
+ - If user message keywords are clear, and image search can find posters, landmarks, items, character drawings, etc., call `internal_image_search`.
23
+ - User message does not need search: Do not call tools.
24
+ - Call `set_mode`:
25
+ - Most routine questions: `standard`.
26
+ - User requests research / deep search: `agent`.
27
+ - Need to get specific page information to answer the question: `agent`.
28
+ > All tools need to be called simultaneously in this conversation.
29
+
30
+ ## Call Tools
31
+ - When using tools, you must call them via the `function_call` / `tool_call` mechanism.
32
32
  {tools_desc}
33
33
 
34
- ## 你的回复
35
- 调用工具后无需回复额外文本节省token.
34
+ ## Your Reply
35
+ Do not reply with extra text after calling tools to save tokens.
36
36
 
37
- ## 用户消息
37
+ ## User Message
38
38
  ```
39
39
  {user_msgs}
40
40
  ```
41
41
  """
42
42
 
43
-
44
- INTRUCT_SP_VISION_ADD = """
45
- ## 视觉专家消息
43
+ INSTRUCT_SP_VISION_ADD = """
44
+ ## Vision Expert Message
46
45
  ```text
47
46
  {vision_msgs}
48
47
  ```
49
48
  """
50
49
 
51
- AGENT_SP = """# 你是一个 Agent 总控专家, 你需要理解用户意图, 根据已有信息给出最终回复.
52
- > 请确保你输出的任何消息有着准确的来源, 减少输出错误信息.
53
-
54
- 当前模式: {mode}, {mode_desc}
55
-
56
-
57
-
58
- ## 过程要求
59
- 当不调用工具发送文本, 即会变成最终回复, 请遵守:
60
- - 直接给出一篇报告, 无需回答用户消息
61
- - 语言: 简体中文, 百科式风格, 语言严谨不啰嗦.
62
- - 正文格式:
63
- - 使用 Markdown 格式, 支持 hightlight, katex
64
- - 最开始给出`# `大标题, 不要有多余废话, 不要直接回答用户的提问.
65
- - 内容丰富突出重点.
66
- - 工具引用:
67
- > 重要: 所有正文内容必须基于实际信息, 保证百分百真实度
68
- - 引用规则:
69
- - 本次会话中存在对解决此问题有用的信息才加以引用, 不需要的消息可以不引用.
70
- - 角标必须真实对应上下文中获取的信息, 同时对应 references 中的内容, 图片按顺序对应.
71
- - 正文中的引用规则
72
- - 搜索摘要引用: 使用如 [search:3][search:4]
73
- - 页面内容引用: 使用如 [page:5][page:6]
74
- - 图片引用: 使用如 [image:7][image:8]
75
- - search 的意思是你使用 internal_web_search 获取的搜索摘要, 如果没有此工具相关信息则不引用
76
- - page 的意思是你使用 crawl_page 获取的页面内容, 如果没有此工具相关信息则不引用
77
- - image 的意思是你使用 internal_image_search 获取的图片, 图片按顺序摆放即可, 你无需显式引用
78
- - 在正文底部添加 references 代码块:
79
- - 用不到的条目不写, 没有专家给信息就不写.
80
- ```references
81
- [2] [search] [文本描述](url)
82
- [8] [search] [文本描述](url)
83
- [1] [page] [页面标题](url)
84
- [2] [page] [页面标题](url)
85
- [1] [image] [来源](url)
86
- ```
87
-
88
- ## 用户消息
50
+ AGENT_SP = """# You are an Agent Control Expert. You need to understand user intent and provide a final reply based on available information.
51
+ > Please ensure that any message you output has an accurate source to reduce misinformation.
52
+
53
+ Current Mode: {mode}, {mode_desc}
54
+
55
+ ## Process Requirements
56
+ When sending text without calling tools, it means this is the final reply. Please observe:
57
+ - Provide a report directly, no need to explicitly answer the user message.
58
+ - Language: {language}, encyclopedic style, rigorous and concise language.
59
+ - Body Format:
60
+ - Use Markdown format, supporting highlight, katex.
61
+ - Give a `# ` main title at the beginning, no extra nonsense, do not directly answer the user's question.
62
+ - Rich content highlighting key points.
63
+ - Citation:
64
+ > Important: All body content must be based on actual information, ensuring 100% accuracy.
65
+ - Information sources are numbered in order of acquisition as [1], [2], [3]...
66
+ - Use [1][2] format directly in body text to cite, only cite sources helpful to the answer
67
+ - No need to provide a reference list, the system will auto-generate it
68
+
69
+ ## User Message
89
70
  ```text
90
71
  {user_msgs}
91
72
  ```
92
73
  """
93
74
 
94
75
  AGENT_SP_TOOLS_STANDARD_ADD = """
95
- 你需要整合已有的信息, 提炼用户消息中的关键词, 进行最终回复.
76
+ You need to integrate existing information, extract keywords from the user message, and make a final reply.
96
77
  """
97
78
 
98
-
99
79
  AGENT_SP_TOOLS_AGENT_ADD = """
100
- - 你现在可以使用工具: {tools_desc}
101
- - 你需要判断顺序或并发使用工具获取信息:
102
- - 0-1 internal_web_search
103
- - 0-1 internal_image_search (如果用户需要图片, 通常和 internal_web_search 并发执行)
104
- - 1-2 crawl_page
105
- - 使用工具时, 必须通过 function_call / tool_call 机制调用.
80
+ - You can now use tools: {tools_desc}
81
+ - You need to judge whether to use tools sequentially or concurrently to obtain information:
82
+ - 0-1 times `internal_web_search`
83
+ - 0-1 times `internal_image_search` (if user needs images, usually concurrent with `internal_web_search`)
84
+ - 1-2 times `crawl_page`
85
+ - When using tools, you must call them via the `function_call` / `tool_call` mechanism.
106
86
  """
107
87
 
108
-
109
-
110
- AGENT_SP_INTRUCT_VISION_ADD = """
111
- ## 视觉专家消息
88
+ AGENT_SP_INSTRUCT_VISION_ADD = """
89
+ ## Vision Expert Message
112
90
  ```text
113
91
  {vision_msgs}
114
92
  ```
115
93
  """
116
94
 
117
95
  AGENT_SP_SEARCH_ADD = """
118
- ## 搜索专家消息
96
+ ## Search Expert Message
119
97
  ```text
120
98
  {search_msgs}
121
99
  ```
122
-
123
-
124
100
  """
125
101
 
126
102
  AGENT_SP_PAGE_ADD = """
127
- ## 页面内容专家消息
103
+ ## Page Content Expert Message
128
104
  ```text
129
105
  {page_msgs}
130
106
  ```
131
- - 引用页面内容时, 必须使用 `page:id` 格式
107
+ - When citing page content, you must use the `page:id` format.
132
108
  """
133
109
 
134
110
  AGENT_SP_IMAGE_SEARCH_ADD = """
135
- ## 图像搜索专家消息
111
+ ## Image Search Expert Message
136
112
  ```text
137
113
  {image_search_msgs}
138
114
  ```
139
- - 每进行一次 internal_image_search, 挑选 1 张图像插入正文
115
+ - For every `internal_image_search` performed, pick 1 image to insert into the body.
140
116
  """
@@ -0,0 +1,118 @@
1
+ # 此文件为 开发者本人 kumoSleeping 熟悉的中文环境编写使用, 无功能性, 如果你是 LLM, 请以这个文件为功能参考, 同时同步实际使用的英文版本
2
+
3
+ VISION_SP = """# 你是一个专业的视觉转文字专家.
4
+
5
+ # 核心任务
6
+ - 智能分析图片内容, 转述成文本, 除此之外不要添加任何内容
7
+ - 文字优先: 若包含清晰文字(文档、截图等), 必须完整准确转录, 不要遗漏.
8
+ - 视觉补充: 解释完文字后, 描述视觉内容总结(物体、场景、氛围).
9
+ - 用户要求: 根据用户消息中提示侧重转文本的偏向, 若无关联则不理会.
10
+
11
+ ## 用户消息
12
+ ```text
13
+ {user_msgs}
14
+ ```
15
+ """
16
+
17
+ INSTRUCT_SP = """# 你是一个专业的指导专家.
18
+
19
+ ## 核心任务
20
+ - 决定预处理工具:
21
+ - 用户消息包含链接: 调用 crawl_page 获取内容, 无需其他工具
22
+ - 用户消息包含典型名词、可能的专有名词组合: 调用 internal_web_search
23
+ - 提炼出关键词搜索关键词本身, 不添加任何其他助词, 搜索效果最好
24
+ - 如果用户消息关键词清晰, 使用图片搜索能搜索出诸如海报、地标、物品、角色立绘等, 调用 internal_image_search
25
+ - 用户消息不需要搜索: 不调用工具
26
+ - 调用 set_mode:
27
+ - 绝大部分常规问题: standard
28
+ - 用户要求研究/深度搜索: agent
29
+ - 需要获取页面具体信息才能回答问题: agent
30
+ > 所有工具需要在本次对话同时调用
31
+
32
+ ## 调用工具
33
+ - 使用工具时, 必须通过 function_call / tool_call 机制调用.
34
+ {tools_desc}
35
+
36
+ ## 你的回复
37
+ 调用工具后无需回复额外文本节省token.
38
+
39
+ ## 用户消息
40
+ ```
41
+ {user_msgs}
42
+ ```
43
+ """
44
+
45
+ INSTRUCT_SP_VISION_ADD = """
46
+ ## 视觉专家消息
47
+ ```text
48
+ {vision_msgs}
49
+ ```
50
+ """
51
+
52
+ AGENT_SP = """# 你是一个 Agent 总控专家, 你需要理解用户意图, 根据已有信息给出最终回复.
53
+ > 请确保你输出的任何消息有着准确的来源, 减少输出错误信息.
54
+
55
+ 当前模式: {mode}, {mode_desc}
56
+
57
+ ## 过程要求
58
+ 当不调用工具发送文本, 即会变成最终回复, 请遵守:
59
+ - 直接给出一篇报告, 无需回答用户消息
60
+ - 语言: {language}, 百科式风格, 语言严谨不啰嗦.
61
+ - 正文格式:
62
+ - 使用 Markdown 格式, 支持 hightlight, katex
63
+ - 最开始给出`# `大标题, 不要有多余废话, 不要直接回答用户的提问.
64
+ - 内容丰富突出重点.
65
+ - 引用:
66
+ > 重要: 所有正文内容必须基于实际信息, 保证百分百真实度
67
+ - 信息来源已按获取顺序编号为 [1], [2], [3]...
68
+ - 正文中直接使用 [1][2] 格式引用, 只引用对回答有帮助的来源
69
+ - 无需给出参考文献列表, 系统会自动生成
70
+
71
+ ## 用户消息
72
+ ```text
73
+ {user_msgs}
74
+ ```
75
+ """
76
+
77
+ AGENT_SP_TOOLS_STANDARD_ADD = """
78
+ 你需要整合已有的信息, 提炼用户消息中的关键词, 进行最终回复.
79
+ """
80
+
81
+ AGENT_SP_TOOLS_AGENT_ADD = """
82
+ - 你现在可以使用工具: {tools_desc}
83
+ - 你需要判断顺序或并发使用工具获取信息:
84
+ - 0-1 次 internal_web_search
85
+ - 0-1 次 internal_image_search (如果用户需要图片, 通常和 internal_web_search 并发执行)
86
+ - 1-2 次 crawl_page
87
+ - 使用工具时, 必须通过 function_call / tool_call 机制调用.
88
+ """
89
+
90
+ AGENT_SP_INSTRUCT_VISION_ADD = """
91
+ ## 视觉专家消息
92
+ ```text
93
+ {vision_msgs}
94
+ ```
95
+ """
96
+
97
+ AGENT_SP_SEARCH_ADD = """
98
+ ## 搜索专家消息
99
+ ```text
100
+ {search_msgs}
101
+ ```
102
+ """
103
+
104
+ AGENT_SP_PAGE_ADD = """
105
+ ## 页面内容专家消息
106
+ ```text
107
+ {page_msgs}
108
+ ```
109
+ - 引用页面内容时, 必须使用 `page:id` 格式
110
+ """
111
+
112
+ AGENT_SP_IMAGE_SEARCH_ADD = """
113
+ ## 图像搜索专家消息
114
+ ```text
115
+ {image_search_msgs}
116
+ ```
117
+ - 每进行一次 internal_image_search, 挑选 1 张图像插入正文
118
+ """
@@ -0,0 +1,113 @@
1
+ Metadata-Version: 2.4
2
+ Name: entari_plugin_hyw
3
+ Version: 3.4.0
4
+ Summary: Use large language models to interpret chat messages
5
+ Author-email: kumoSleeping <zjr2992@outlook.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/kumoSleeping/entari-plugin-hyw
8
+ Project-URL: Repository, https://github.com/kumoSleeping/entari-plugin-hyw
9
+ Project-URL: Issue Tracker, https://github.com/kumoSleeping/entari-plugin-hyw/issues
10
+ Keywords: entari,llm,ai,bot,chat
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Requires-Python: >=3.10
18
+ Description-Content-Type: text/markdown
19
+ Requires-Dist: arclet-entari[full]>=0.16.5
20
+ Requires-Dist: openai
21
+ Requires-Dist: httpx
22
+ Requires-Dist: markdown>=3.10
23
+ Requires-Dist: crawl4ai>=0.7.8
24
+ Requires-Dist: jinja2>=3.0
25
+ Requires-Dist: ddgs>=9.10.0
26
+ Provides-Extra: dev
27
+ Requires-Dist: entari-plugin-server>=0.5.0; extra == "dev"
28
+ Requires-Dist: satori-python-adapter-onebot11>=0.2.5; extra == "dev"
29
+
30
+ # Entari Plugin HYW
31
+
32
+ [![PyPI version](https://badge.fury.io/py/entari-plugin-hyw.svg)](https://badge.fury.io/py/entari-plugin-hyw)
33
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
34
+ [![Python Versions](https://img.shields.io/pypi/pyversions/entari-plugin-hyw.svg)](https://pypi.org/project/entari-plugin-hyw/)
35
+
36
+ **English** | [简体中文](docs/README_CN.md)
37
+
38
+ **Entari Plugin HYW** is an advanced agentic chat plugin for the [Entari](https://github.com/entari-org/entari) framework. It leverages Large Language Models (LLMs) to provide intelligent, context-aware, and multi-modal responses within instant messaging environments (OneBot 11, Satori).
39
+
40
+ The plugin implements a three-stage pipeline (**Vision**, **Instruct**, **Agent**) to autonomously decide when to search the web, crawl pages, or analyze images to answer user queries effectively.
41
+
42
+ <p align="center">
43
+ <img src="docs/demo_mockup.svg" width="800" />
44
+ </p>
45
+
46
+ ## Features
47
+
48
+ - 📖 **Agentic Workflow**
49
+ Autonomous decision-making process to search, browse, and reason.
50
+
51
+ - 🎑 **Multi-Modal Support**
52
+ Native support for image analysis using Vision Language Models (VLMs).
53
+
54
+ - 🔍 **Web Search & Crawling**
55
+ Integrated **DuckDuckGo** and **Crawl4AI** for real-time information retrieval.
56
+
57
+ - 🎨 **Rich Rendering**
58
+ Responses are rendered as images containing Markdown, syntax-highlighted code, LaTeX math, and citation badges.
59
+
60
+ - 🔌 **Protocol Support**
61
+ Deep integration with OneBot 11 and Satori protocols, handling reply context and JSON cards perfectly.
62
+
63
+ ## Installation
64
+
65
+ ```bash
66
+ pip install entari-plugin-hyw
67
+ ```
68
+
69
+ ## Configuration
70
+
71
+ Configure the plugin in your `entari.yml`.
72
+
73
+ ### Minimal Configuration
74
+
75
+ ```yaml
76
+ plugins:
77
+ entari_plugin_hyw:
78
+ model_name: google/gemini-3-flash-preview
79
+ api_key: "your-or-api-key-here"
80
+ ```
81
+
82
+ ## Usage
83
+
84
+ ### Commands
85
+
86
+ - **Text Query**
87
+ ```text
88
+ /q What's the latest news on Rust 1.83?
89
+ ```
90
+
91
+ - **Image Analysis**
92
+ *(Send an image with command, or reply to an image)*
93
+ ```text
94
+ /q [Image] Explain this error.
95
+ ```
96
+ - **Quote Query**
97
+ ```text
98
+ [quote: User Message] /q
99
+ ```
100
+
101
+ - **Follow-up**
102
+ *Reply to the bot's message to continue the conversation.*
103
+
104
+ ## Documentation for AI/LLMs
105
+
106
+ - [Instruction Guide (English)](docs/README_LLM_EN.md)
107
+ - [指导手册 (简体中文)](docs/README_LLM_CN.md)
108
+
109
+ ---
110
+
111
+ ## License
112
+
113
+ This project is licensed under the MIT License.
@@ -1,4 +1,4 @@
1
- entari_plugin_hyw/__init__.py,sha256=K5WW4usKpP38CTxQHpm693brIhsbxBRsD0ojwekmMGE,19689
1
+ entari_plugin_hyw/__init__.py,sha256=BwlY42IJj51Oo8hHrTxbCDItRA3bMr7Z1G-3HUEyWhQ,19708
2
2
  entari_plugin_hyw/assets/package-lock.json,sha256=TIrLM-wLWZTrp3LKfzhEVuduhvBJmI93NdQEKYLW2W0,33172
3
3
  entari_plugin_hyw/assets/package.json,sha256=Y4H8JGtp3nv2WUtI20tXoXWddR-dwwKJhqQVLercpiw,306
4
4
  entari_plugin_hyw/assets/tailwind.config.js,sha256=S8I9X8hI8IaQRczWK9hTW-zl4oVpAXw5ykeksrzHjpU,382
@@ -11,6 +11,7 @@ entari_plugin_hyw/assets/icon/deepseek.png,sha256=KWWAr9aeYMc6I07U_1qo7zcXO6e7-k
11
11
  entari_plugin_hyw/assets/icon/gemini.svg,sha256=H74CoVmx5opcCtr3Ay3M09dpqL9cd9Whkx-M6an3t7s,599
12
12
  entari_plugin_hyw/assets/icon/google.svg,sha256=H74CoVmx5opcCtr3Ay3M09dpqL9cd9Whkx-M6an3t7s,599
13
13
  entari_plugin_hyw/assets/icon/grok.png,sha256=uSulvvDVqoA4RUOW0ZAkdvBVM2rpyGJRZIbn5dEFspw,362
14
+ entari_plugin_hyw/assets/icon/huggingface.png,sha256=8eAudeftUDO11jf0coOscPeRkskCb7l9TNMx78q61mY,24564
14
15
  entari_plugin_hyw/assets/icon/microsoft.svg,sha256=-am_6N3UEQYSzldDg-xrdGYjTWsagH-3v4Q_eia1ymE,684
15
16
  entari_plugin_hyw/assets/icon/minimax.png,sha256=tWqVlMdFNPpP8zWWX9tvIsWXI9q76P7O3t3CEZO7NU0,1525
16
17
  entari_plugin_hyw/assets/icon/mistral.png,sha256=0vv7jPmPKiBRYVYYJxVL_wIH_qa_ZssIdV3NDO5vbmk,869
@@ -20,6 +21,7 @@ entari_plugin_hyw/assets/icon/openrouter.png,sha256=exxfjWGDWpYH-Vc8xJDbhNVeXFEV
20
21
  entari_plugin_hyw/assets/icon/perplexity.svg,sha256=mHWZFoeWmDYXOIDzm9pj6_sRotaI8xNy5Lkeg5Vzu70,555
21
22
  entari_plugin_hyw/assets/icon/qwen.png,sha256=eqLbnIPbjh2_PsODU_mmqjeD82xXj8fV_kN0fDrNaD0,38419
22
23
  entari_plugin_hyw/assets/icon/xai.png,sha256=uSulvvDVqoA4RUOW0ZAkdvBVM2rpyGJRZIbn5dEFspw,362
24
+ entari_plugin_hyw/assets/icon/xiaomi.png,sha256=WHxlDFGU5FCjb-ure3ngdGG18-efYZUUfqA3_lqCUN0,4084
23
25
  entari_plugin_hyw/assets/icon/zai.png,sha256=K-gnabdsjMLInppHA1Op7Nyt33iegrx1x-yNlvCZ0Tc,2351
24
26
  entari_plugin_hyw/assets/libs/highlight.css,sha256=Oppd74ucMR5a5Dq96FxjEzGF7tTw2fZ_6ksAqDCM8GY,1309
25
27
  entari_plugin_hyw/assets/libs/highlight.js,sha256=g3pvpbDHNrUrveKythkPMF2j_J7UFoHbUyFQcFe1yEY,121727
@@ -28,18 +30,19 @@ entari_plugin_hyw/assets/libs/katex.css,sha256=UF1fgpAiu3tPJN_uCqEUHNe7pnr-QR0SQ
28
30
  entari_plugin_hyw/assets/libs/katex.js,sha256=3ISyluw-iE3gkxWPdg_Z1Ftser5YtTgVV_ThOPRqWK4,277038
29
31
  entari_plugin_hyw/assets/libs/tailwind.css,sha256=ee_3txpnxhChZOjSJQUX0XiL1Nq0U2KLTvSGJLZBlaA,19916
30
32
  entari_plugin_hyw/core/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
- entari_plugin_hyw/core/config.py,sha256=rN2hVI964D7eM6xPuOthEIpXyFGZpRSYNrl1xdKHJ1s,1636
33
+ entari_plugin_hyw/core/config.py,sha256=uKZOuK9bG1W-E5UzhKX-EcYu2nFdxfl9EEaSwVgFtK4,1653
32
34
  entari_plugin_hyw/core/history.py,sha256=vqp7itwR5-KaqC4Ftmq6GOz7OM9GsiFJnSN9JJ2P6L4,5894
33
35
  entari_plugin_hyw/core/hyw.py,sha256=RCRjV9uYmvXysiliztphLP3VyUabrf0LY2Bk66W5JGA,1927
34
- entari_plugin_hyw/core/pipeline.py,sha256=t1nIjQlMKqTb7wpY7Dn1rbWDD2kl8WWUnPQBY5MQO4E,49071
35
- entari_plugin_hyw/core/render.py,sha256=rUhv2R5fdtsMIGg-Q1qe8hhUWC1_E50BODLA78u4_SI,28948
36
+ entari_plugin_hyw/core/pipeline.py,sha256=5pU7K8q8jYMJVdudUtpz0Pq4bI1MUpBX0Jdrkc0vswE,47868
37
+ entari_plugin_hyw/core/render.py,sha256=3tgmB3Pntbcr4YcyvF8tzaihNdol9sitFkXPrgmQXVQ,29696
36
38
  entari_plugin_hyw/utils/__init__.py,sha256=TnkxDqYr0zgRE7TC92tVbUaY8m1UyyoLg2zvzQ8nMVI,84
37
39
  entari_plugin_hyw/utils/browser.py,sha256=LJlFh-oSqt9mQBpMALxbYGUG__t1YLUo7RxUAslsWUc,1416
38
40
  entari_plugin_hyw/utils/misc.py,sha256=_7iHVYj_mJ6OGq6FU1s_cFeS1Ao-neBjZYd6eI2p95U,3482
39
41
  entari_plugin_hyw/utils/playwright_tool.py,sha256=ZZNkzFtUt_Gxny3Od4boBAgNF9J0N84uySatzn1Bwe4,1272
40
- entari_plugin_hyw/utils/prompts.py,sha256=oJpgNvRQ_Lmr2Ca-B6fcpysMT2i0obioBC1DuH_Z1MY,4430
42
+ entari_plugin_hyw/utils/prompts.py,sha256=Jp94gc0BE6Kn-5N2soXDo66ySX7NxrczCcuS16nGaMU,4035
43
+ entari_plugin_hyw/utils/prompts_cn.py,sha256=DNSd4U6htUGcFXbywEX4d-WXnQL57CZsq-vba-T6hHw,3695
41
44
  entari_plugin_hyw/utils/search.py,sha256=Bvz2KFw3Gr2nuvmlo_8ExLHvO353NKX-YN35A2FCsBw,19047
42
- entari_plugin_hyw-3.3.7.dist-info/METADATA,sha256=ZQ_HfsEQKXUPkEf2KaPSO9hPm1bYgHSfCo-GRV-f9l8,6340
43
- entari_plugin_hyw-3.3.7.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
44
- entari_plugin_hyw-3.3.7.dist-info/top_level.txt,sha256=TIDsn6XPs6KA5e3ezsE65JoXsy03ejDdrB41I4SPjmo,18
45
- entari_plugin_hyw-3.3.7.dist-info/RECORD,,
45
+ entari_plugin_hyw-3.4.0.dist-info/METADATA,sha256=iLjguVLoNWNm3eMhE2cmGqDcZyW25wn3fUCHmU4idwE,3598
46
+ entari_plugin_hyw-3.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
47
+ entari_plugin_hyw-3.4.0.dist-info/top_level.txt,sha256=TIDsn6XPs6KA5e3ezsE65JoXsy03ejDdrB41I4SPjmo,18
48
+ entari_plugin_hyw-3.4.0.dist-info/RECORD,,
@@ -1,142 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: entari_plugin_hyw
3
- Version: 3.3.7
4
- Summary: Use large language models to interpret chat messages
5
- Author-email: kumoSleeping <zjr2992@outlook.com>
6
- License: MIT
7
- Project-URL: Homepage, https://github.com/kumoSleeping/entari-plugin-hyw
8
- Project-URL: Repository, https://github.com/kumoSleeping/entari-plugin-hyw
9
- Project-URL: Issue Tracker, https://github.com/kumoSleeping/entari-plugin-hyw/issues
10
- Keywords: entari,llm,ai,bot,chat
11
- Classifier: Development Status :: 3 - Alpha
12
- Classifier: Intended Audience :: Developers
13
- Classifier: License :: OSI Approved :: MIT License
14
- Classifier: Programming Language :: Python :: 3.10
15
- Classifier: Programming Language :: Python :: 3.11
16
- Classifier: Programming Language :: Python :: 3.12
17
- Requires-Python: >=3.10
18
- Description-Content-Type: text/markdown
19
- Requires-Dist: arclet-entari[full]>=0.16.5
20
- Requires-Dist: openai
21
- Requires-Dist: httpx
22
- Requires-Dist: markdown>=3.10
23
- Requires-Dist: crawl4ai>=0.7.8
24
- Requires-Dist: jinja2>=3.0
25
- Requires-Dist: ddgs>=9.10.0
26
- Provides-Extra: dev
27
- Requires-Dist: entari-plugin-server>=0.5.0; extra == "dev"
28
- Requires-Dist: satori-python-adapter-onebot11>=0.2.5; extra == "dev"
29
-
30
-
31
- # Entari Plugin HYW
32
-
33
-
34
- [![PyPI version](https://badge.fury.io/py/entari-plugin-hyw.svg)](https://badge.fury.io/py/entari-plugin-hyw)
35
- [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
36
- [![Python Versions](https://img.shields.io/pypi/pyversions/entari-plugin-hyw.svg)](https://pypi.org/project/entari-plugin-hyw/)
37
-
38
- **Entari Plugin HYW** is an advanced agentic chat plugin for the [Entari](https://github.com/entari-org/entari) framework. It leverages Large Language Models (LLMs) to provide intelligent, context-aware, and multi-modal responses within instant messaging environments (OneBot 11, Satori).
39
-
40
- **Entari Plugin HYW** 是 Entari 框架的高级智能体聊天插件。它利用大语言模型(LLM)在即时通讯环境(OneBot 11, Satori)中提供智能、上下文感知和多模态的回复体验。
41
-
42
- The plugin implements a three-stage pipeline (**Vision**, **Instruct**, **Agent**) to autonomously decide when to search the web, crawl pages, or analyze images to answer user queries effectively.
43
-
44
- 插件实现了三阶段流水线(**视觉**、**指令**、**代理**),能够自主决定何时搜索网络、抓取网页或分析图片,从而高效地回答用户问题。
45
-
46
- <img src="demo.jpg" width="300" />
47
-
48
- ## Features / 功能特性
49
-
50
- - 📖 **Agentic Workflow (智能工作流)**
51
- Autonomous decision-making process to search, browse, and reason.
52
- 具备自主决策能力,能够自动进行搜索、网页浏览和逻辑推理。
53
-
54
- - 🎑 **Multi-Modal Support (多模态支持)**
55
- Native support for image analysis using Vision Language Models (VLMs).
56
- 原生支持图片分析,利用视觉语言模型(VLM)理解图像内容。
57
-
58
- - 🔍 **Web Search & Crawling (搜索与抓取)**
59
- Integrated **DuckDuckGo** and **Crawl4AI** for real-time information retrieval.
60
- 集成 DuckDuckGo 搜索与 Crawl4AI 网页抓取,实时获取互联网信息。
61
-
62
- - 🎨 **Rich Rendering (富媒体渲染)**
63
- Responses are rendered as images containing Markdown, syntax-highlighted code, LaTeX math, and citation badges.
64
- 回答将渲染为包含 Markdown、代码高亮、LaTeX 公式及引用角标的精美图片。
65
-
66
- - 🔌 **Protocol Support (多协议适配)**
67
- Deep integration with OneBot 11 and Satori protocols.
68
- 深度适配 OneBot 11 和 Satori 协议,完美处理回复上下文与 JSON 卡片。
69
-
70
- ## Installation / 安装
71
-
72
- ```bash
73
- pip install entari-plugin-hyw
74
- ```
75
-
76
- ## Configuration / 配置
77
-
78
- Configure the plugin in your `entari.yml`.
79
- 在 `entari.yml` 中进行配置。
80
-
81
- ### Minimal Configuration / 最小配置
82
-
83
- ```yaml
84
- plugins:
85
- entari_plugin_hyw:
86
- # Trigger command / 触发指令
87
- question_command: ".q"
88
-
89
- # Main Model (Required) / 主模型(必需)
90
- model_name: "google/gemini-2.0-flash-exp"
91
- api_key: "your-api-key-here"
92
- base_url: "https://generativelanguage.googleapis.com/v1beta/openai/"
93
- ```
94
-
95
- ### Configuration Reference / 配置详解
96
-
97
- | Option (选项) | Type | Default | Description (说明) |
98
- | :--- | :--- | :--- | :--- |
99
- | **Basic** | | | |
100
- | `question_command` | `str` | `/q` | The command to trigger the bot. <br> 触发机器人的指令前缀。 |
101
- | `reaction` | `bool` | `true` | React with emoji on start(now only lagrange ob extension). <br> 收到指令时是否回应表情(目前只支持拉格兰ob扩展)。 |
102
- | `quote` | `bool` | `true` | Quote the user's message in reply. <br> 回复时是否引用原消息。 |
103
- | **Models** | | | |
104
- | `model_name` | `str` | *None* | **Required.** Main Agent model ID. <br> **必需。** 主代理模型 ID。 |
105
- | `api_key` | `str` | *None* | **Required.** API key. <br> **必需。** API 密钥。 |
106
- | `base_url` | `str` | `...` | OpenAI-compatible API base URL. <br> 兼容 OpenAI 的 API 地址。 |
107
- | `extra_body` | `dict` | `null` | Extra parameters (e.g. `reasoning_effort`). <br> 传递给 LLM 的额外参数。 |
108
- | **Specialized** | | | |
109
- | `vision_model_name`| `str` | *None* | Model for images. Defaults to `model_name`. <br> 处理图片的模型,默认同主模型。 |
110
- | `intruct_model_name`| `str` | *None* | Model for intent. Defaults to `model_name`. <br> 意图识别模型,默认同主模型。 |
111
- | **Tools** | | | |
112
- | `search_provider` | `str` | `ddgs`| `ddgs` (DuckDuckGo), `crawl4ai`, `httpx`. <br> 搜索后端提供商。 |
113
- | `search_limit` | `int` | `8` | Max search results. <br> 搜索结果数量限制。 |
114
- | `headless` | `bool` | `true` | Browser headless mode. <br> 浏览器无头模式。 |
115
-
116
- ## Usage / 使用方法
117
-
118
- ### Commands / 指令
119
-
120
- - **Text Query (文本问答)**
121
- ```text
122
- .q What's the latest news on Rust 1.83?
123
- .q Rust 1.83 有什么新特性?
124
- ```
125
-
126
- - **Image Analysis (图片分析)**
127
- *(Send an image with command, or reply to an image)*
128
- *(发送带图片的指令,或回复一张图片)*
129
- ```text
130
- .q [Image] Explain this error.
131
- .q [图片] 解释一下这个报错。
132
- ```
133
-
134
- - **Follow-up (追问)**
135
- *Reply to the bot's message to continue the conversation.*
136
- *直接回复机器人的消息即可进行连续对话。*
137
-
138
- -----
139
-
140
- ## License
141
-
142
- This project is licensed under the MIT License.