entari-plugin-hyw 3.3.4__py3-none-any.whl → 3.3.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,6 +16,40 @@
16
16
  <script>{{ katex_auto_render_js | safe }}</script>
17
17
  <!-- @formatter:on -->
18
18
 
19
+ <style>
20
+ /* Fallback style for broken images in markdown content */
21
+ .img-error-fallback {
22
+ display: flex;
23
+ align-items: center;
24
+ justify-content: center;
25
+ gap: 8px;
26
+ width: 100%;
27
+ aspect-ratio: 16 / 9;
28
+ margin-bottom: 8px;
29
+ background: linear-gradient(135deg, #d3e4fd 0%, #b7d3fe 50%, #8bb9fc 100%);
30
+ border-radius: 12px;
31
+ color: white;
32
+ font-size: 14px;
33
+ font-weight: 500;
34
+ box-shadow: 0 4px 12px rgba(59, 130, 246, 0.25);
35
+ }
36
+ .img-error-fallback i {
37
+ font-size: 20px;
38
+ }
39
+ /* Dynamic image sizing based on aspect ratio */
40
+ #markdown-content img {
41
+ border-radius: 8px;
42
+ margin-bottom: 8px;
43
+ }
44
+ #markdown-content img.img-horizontal {
45
+ width: 100%;
46
+ height: auto;
47
+ }
48
+ #markdown-content img.img-vertical {
49
+ width: 60%;
50
+ height: auto;
51
+ }
52
+ </style>
19
53
  </head>
20
54
 
21
55
  <body class="bg-[#f2f2f2] p-0 box-border m-0 font-sans text-gray-800">
@@ -135,7 +169,7 @@
135
169
  {{ list_card(stage.icon_html, title_html, subtitle_html=stats_html, is_compact=True, icon_box_class=icon_box_class) }}
136
170
 
137
171
  {# Nested Children (Indent & Connect) #}
138
- {% if stage.references or stage.flow_steps or stage.crawled_pages %}
172
+ {% if stage.references or stage.image_references or stage.flow_steps or stage.crawled_pages %}
139
173
  <div class="ml-4 pl-4 border-l-2 border-gray-200 mt-2 flex flex-col gap-2">
140
174
 
141
175
  {# References #}
@@ -158,18 +192,23 @@
158
192
  {% endfor %}
159
193
  {% endif %}
160
194
 
161
- {# Flow Steps #}
162
- {% if stage.flow_steps %}
163
- <div class="text-[12px] uppercase font-bold text-orange-600 tracking-wider mb-1 mt-1">Flow</div>
164
- {% for step in stage.flow_steps %}
165
- {% set icon_box_class = "rounded-md border border-gray-100 bg-white text-gray-500 shrink-0" %}
195
+ {# Image References #}
196
+ {% if stage.image_references %}
197
+ <div class="text-[12px] uppercase font-bold text-blue-600 tracking-wider mb-1 mt-2">Images</div>
198
+ {% for img in stage.image_references %}
199
+ {% set favicon_url = "https://www.google.com/s2/favicons?domain=" + img.domain + "&sz=32" %}
166
200
 
167
- {% set title_html = '<div class="text-[13px] font-semibold text-gray-900 underline decoration-gray-300 decoration-1 underline-offset-2 truncate">' + step.description + '</div>' %}
168
- {% set subtitle_html = '<div class="text-[12px] text-gray-700 leading-tight truncate">' + step.description + '</div>' %}
169
- {% set right_html = '<div class="flex items-center justify-center min-w-[16px] h-4 px-0.5 text-[10px] font-bold text-orange-700 bg-orange-50 border border-orange-200 rounded">' + ('abcdefghijklmnopqrstuvwxyz'[loop.index0]) + '</div>' %}
201
+ {% set img_icon %}
202
+ <img src="{{ favicon_url }}" class="w-3.5 h-3.5 rounded-sm opacity-80">
203
+ {% endset %}
170
204
 
171
- {{ list_card(step.icon_svg, title_html, subtitle_html=subtitle_html, right_content_html=right_html, is_compact=True, icon_box_class=icon_box_class) }}
172
- {% endfor %}
205
+ {% set img_icon_box = "bg-white rounded border border-gray-100 w-6 h-6 shrink-0" %}
206
+
207
+ {% set title_html = '<div class="text-[13px] font-medium text-gray-900 truncate">' + img.title + '</div>' %}
208
+ {% set subtitle_html = '<div class="text-[12px] text-gray-500 truncate">' + img.domain + '</div>' %}
209
+
210
+ {{ list_card(img_icon, title_html, subtitle_html=subtitle_html, link_url=img.url, is_compact=True, icon_box_class=img_icon_box) }}
211
+ {% endfor %}
173
212
  {% endif %}
174
213
 
175
214
  {# Crawled Pages #}
@@ -272,23 +311,46 @@
272
311
  const fragment = document.createDocumentFragment();
273
312
  let lastIndex = 0;
274
313
  const text = textNode.nodeValue;
275
- const regex = /`?(search|page):(\d+)`?/gi;
314
+ // Regex to capture:
315
+ // 1. Optional brackets/parens: [(
316
+ // 2. Type: search/page
317
+ // 3. IDs: 1 or 1,2,3
318
+ // 4. Closing: )]
319
+ const regex = /[\[\(]?(search|page):\s*([\d,\s]+)[\]\)]?/gi;
276
320
  let match;
277
321
 
278
322
  while ((match = regex.exec(text)) !== null) {
323
+ // Validate match: simple check to ensure it contains digits
324
+ if (!/\d/.test(match[2])) continue;
325
+
279
326
  fragment.appendChild(document.createTextNode(text.substring(lastIndex, match.index)));
280
327
 
328
+ const fullMatch = match[0];
281
329
  const type = match[1].toLowerCase();
282
- const id = match[2];
330
+ const idString = match[2];
283
331
 
284
- const span = document.createElement("span");
285
- const isPage = type === "page";
286
- const colorClass = isPage
287
- ? "text-orange-600 bg-orange-50 border-orange-200"
288
- : "text-blue-600 bg-blue-50 border-blue-200";
332
+ // Parse IDs (split by comma or space)
333
+ const ids = idString.split(/[,\s]+/).filter(s => s.trim().length > 0);
289
334
 
290
- span.innerHTML = `<span class="inline-flex items-center justify-center min-w-[14px] h-4 px-0.5 text-[9px] font-bold ${colorClass} border rounded align-top -top-0.5 relative mx-0.5 cursor-default" title="${type}:${id}">${id}</span>`;
291
- fragment.appendChild(span.firstElementChild);
335
+ // Check for standard format (allow plain or [brackets])
336
+ // Standard: search:1, [search:1], page:1, [page:1]
337
+ // Non-standard: (page:1), page:1,2, (page:1,2)
338
+ const isStandard = /^[\[]?(search|page):\d+[\]]?$/i.test(fullMatch);
339
+
340
+ if (!isStandard) {
341
+ console.warn(`[Template] Detected non-standard citation format: "${fullMatch}". Rendered as: ${type}:${ids.join(',')}`);
342
+ }
343
+
344
+ ids.forEach(id => {
345
+ const span = document.createElement("span");
346
+ const isPage = type === "page";
347
+ const colorClass = isPage
348
+ ? "text-orange-600 bg-orange-50 border-orange-200"
349
+ : "text-blue-600 bg-blue-50 border-blue-200";
350
+
351
+ span.innerHTML = `<span class="inline-flex items-center justify-center min-w-[14px] h-4 px-0.5 text-[9px] font-bold ${colorClass} border rounded align-top -top-0.5 relative mx-0.5 cursor-default" title="${type}:${id}">${id}</span>`;
352
+ fragment.appendChild(span.firstElementChild);
353
+ });
292
354
 
293
355
  lastIndex = regex.lastIndex;
294
356
  }
@@ -301,6 +363,37 @@
301
363
  }
302
364
 
303
365
  processCitations(contentDiv);
366
+
367
+ // Handle broken images in markdown content
368
+ const contentImages = contentDiv.querySelectorAll('img');
369
+ contentImages.forEach(img => {
370
+ // Apply sizing class based on aspect ratio
371
+ const applySizeClass = function() {
372
+ if (this.naturalWidth >= this.naturalHeight) {
373
+ this.classList.add('img-horizontal');
374
+ } else {
375
+ this.classList.add('img-vertical');
376
+ }
377
+ };
378
+
379
+ img.onerror = function() {
380
+ const fallback = document.createElement('span');
381
+ fallback.className = 'img-error-fallback';
382
+ fallback.innerHTML = `<span style="font-size: 18px;">(。•́︿•̀。)</span><span>渲染失败</span>`;
383
+ this.parentNode.replaceChild(fallback, this);
384
+ };
385
+
386
+ // Check if image already loaded
387
+ if (img.complete) {
388
+ if (img.naturalHeight === 0) {
389
+ img.onerror();
390
+ } else {
391
+ applySizeClass.call(img);
392
+ }
393
+ } else {
394
+ img.onload = applySizeClass;
395
+ }
396
+ });
304
397
  });
305
398
  </script>
306
399
  </body>
@@ -18,6 +18,7 @@ class HYWConfig:
18
18
  search_base_url: str = "https://lite.duckduckgo.com/lite/?q={query}"
19
19
  image_search_base_url: str = "https://duckduckgo.com/?q={query}&iax=images&ia=images"
20
20
  search_params: Optional[str] = None # e.g. "&kl=cn-zh" for China region
21
+ search_limit: int = 8
21
22
  extra_body: Optional[Dict[str, Any]] = None
22
23
  temperature: float = 0.4
23
24
  max_turns: int = 10
@@ -34,3 +35,4 @@ class HYWConfig:
34
35
  vision_output_price: Optional[float] = None
35
36
  intruct_input_price: Optional[float] = None
36
37
  intruct_output_price: Optional[float] = None
38
+
@@ -39,6 +39,10 @@ class ProcessingPipeline:
39
39
  self.client = AsyncOpenAI(base_url=self.config.base_url, api_key=self.config.api_key)
40
40
  self.all_web_results = [] # Cache for search results
41
41
  self.current_mode = "standard" # standard | agent
42
+ # Independent ID counters for each type
43
+ self.search_id_counter = 0
44
+ self.page_id_counter = 0
45
+ self.image_id_counter = 0
42
46
 
43
47
  self.web_search_tool = {
44
48
  "type": "function",
@@ -118,8 +122,11 @@ class ProcessingPipeline:
118
122
  final_response_content = ""
119
123
  structured: Dict[str, Any] = {}
120
124
 
121
- # Reset search cache for this execution
125
+ # Reset search cache and ID counters for this execution
122
126
  self.all_web_results = []
127
+ self.search_id_counter = 0
128
+ self.page_id_counter = 0
129
+ self.image_id_counter = 0
123
130
 
124
131
  try:
125
132
  logger.info(f"Pipeline: Starting workflow for '{user_input}' using {active_model}")
@@ -244,8 +251,8 @@ class ProcessingPipeline:
244
251
  search_msgs_text = self._format_search_msgs()
245
252
  image_msgs_text = self._format_image_search_msgs()
246
253
 
247
- has_search_results = any(not r.get("is_image") for r in self.all_web_results)
248
- has_image_results = any(r.get("is_image") for r in self.all_web_results)
254
+ has_search_results = any(r.get("_type") == "search" for r in self.all_web_results)
255
+ has_image_results = any(r.get("_type") == "image" for r in self.all_web_results)
249
256
 
250
257
  # Build agent system prompt
251
258
  agent_prompt_tpl = getattr(self.config, "agent_system_prompt", None) or AGENT_SP
@@ -462,7 +469,7 @@ class ProcessingPipeline:
462
469
  for tc in crawl_calls:
463
470
  url = tc.get("arguments", {}).get("url", "")
464
471
  # Try to find cached result
465
- found = next((r for r in self.all_web_results if r.get("url") == url and r.get("is_crawled")), None)
472
+ found = next((r for r in self.all_web_results if r.get("url") == url and r.get("_type") == "page"), None)
466
473
  if found:
467
474
  try:
468
475
  from urllib.parse import urlparse
@@ -588,6 +595,19 @@ class ProcessingPipeline:
588
595
  last_agent["time"] = a.get("time", 0)
589
596
  last_agent["cost"] = a.get("cost", 0.0)
590
597
 
598
+ # Clean up conversation history: Remove tool calls and results to save tokens and avoid ID conflicts
599
+ # Keep only 'user' messages and 'assistant' messages without tool_calls (final answers)
600
+ cleaned_history = []
601
+ for msg in current_history:
602
+ if msg.get("role") == "tool":
603
+ continue
604
+ if msg.get("role") == "assistant" and msg.get("tool_calls"):
605
+ continue
606
+ cleaned_history.append(msg)
607
+
608
+ # Update the reference (since it might be used by caller)
609
+ current_history[:] = cleaned_history
610
+
591
611
  return {
592
612
  "llm_response": final_content,
593
613
  "structured_response": structured,
@@ -609,8 +629,8 @@ class ProcessingPipeline:
609
629
  }
610
630
 
611
631
  def _parse_tagged_response(self, text: str) -> Dict[str, Any]:
612
- """Parse response for references and page references."""
613
- parsed = {"response": "", "references": [], "page_references": [], "flow_steps": []}
632
+ """Parse response for references and page references reordered by appearance."""
633
+ parsed = {"response": "", "references": [], "page_references": [], "image_references": [], "flow_steps": []}
614
634
  if not text:
615
635
  return parsed
616
636
 
@@ -620,7 +640,6 @@ class ProcessingPipeline:
620
640
 
621
641
  # 1. Try to unwrap JSON if the model acted like a ReAct agent
622
642
  try:
623
- # Check if it looks like JSON first to avoid performance hit
624
643
  if remaining_text.strip().startswith("{") and "action" in remaining_text:
625
644
  data = json.loads(remaining_text)
626
645
  if isinstance(data, dict) and "action_input" in data:
@@ -628,86 +647,80 @@ class ProcessingPipeline:
628
647
  except Exception:
629
648
  pass
630
649
 
631
- id_map = {} # Map original search ID (str) -> new index (int)
632
- page_id_map = {} # Map original page ID (str) -> new index (int)
633
-
634
- # Parse References Block (unified: contains both [search] and [page] entries)
650
+ # 2. Remove the original references block if present (we will rebuild it)
635
651
  ref_block_match = re.search(r'```references\s*(.*?)\s*```', remaining_text, re.DOTALL | re.IGNORECASE)
636
652
  if ref_block_match:
637
- ref_content = ref_block_match.group(1).strip()
638
- for line in ref_content.split("\n"):
639
- line = line.strip()
640
- if not line: continue
641
-
642
- # Match [id] [type] [title](url)
643
- # e.g. [1] [search] [文本描述](url) or [5] [page] [页面标题](url)
644
- id_match = re.match(r"^\[(\d+)\]", line)
645
- type_match = re.search(r"\[(search|page)\]", line, re.IGNORECASE)
646
- link_match = re.search(r"\[([^\[\]]+)\]\(([^)]+)\)", line)
647
-
648
- idx = None
649
- if id_match:
650
- try:
651
- idx = int(id_match.group(1))
652
- except ValueError:
653
- pass
654
-
655
- ref_type = "search" # default
656
- if type_match:
657
- ref_type = type_match.group(1).lower()
658
-
659
- entry = None
660
- if idx is not None and self.all_web_results:
661
- # For page type, only match crawled items
662
- if ref_type == "page":
663
- found = next((r for r in self.all_web_results if r.get("_id") == idx and r.get("is_crawled")), None)
664
- else:
665
- found = next((r for r in self.all_web_results if r.get("_id") == idx and not r.get("is_crawled")), None)
666
-
667
- if found:
668
- entry = {
669
- "title": found.get("title"),
670
- "url": found.get("url"),
671
- "domain": found.get("domain", "")
672
- }
673
-
674
- if not entry and link_match:
675
- entry = {"title": link_match.group(1), "url": link_match.group(2)}
676
-
677
- if entry:
678
- if ref_type == "page":
679
- parsed["page_references"].append(entry)
680
- if idx is not None:
681
- page_id_map[str(idx)] = len(parsed["page_references"])
682
- else:
683
- parsed["references"].append(entry)
684
- if idx is not None:
685
- id_map[str(idx)] = len(parsed["references"])
686
-
687
653
  remaining_text = remaining_text.replace(ref_block_match.group(0), "").strip()
688
654
 
689
- # Replace search:id citations
690
- if id_map:
691
- def replace_search_citation(match):
692
- old_id = match.group(1) or match.group(2)
693
- if old_id in id_map:
694
- return f"`search:{id_map[old_id]}`"
695
- return match.group(0)
696
-
697
- remaining_text = re.sub(r'\[(\d+)\]', replace_search_citation, remaining_text)
698
- remaining_text = re.sub(r'(?<!`)search:(\d+)(?!`)', replace_search_citation, remaining_text)
699
- remaining_text = re.sub(r'`search:(\d+)`', replace_search_citation, remaining_text)
700
-
701
- # Replace page:id citations
702
- if page_id_map:
703
- def replace_page_citation(match):
704
- old_id = match.group(1)
705
- if old_id in page_id_map:
706
- return f"`page:{page_id_map[old_id]}`"
707
- return match.group(0)
708
-
709
- remaining_text = re.sub(r'(?<!`)page:(\d+)(?!`)', replace_page_citation, remaining_text)
710
- remaining_text = re.sub(r'`page:(\d+)`', replace_page_citation, remaining_text)
655
+ # 3. Scan text for [type:id] tags and rebuild references in order of appearance
656
+ # Pattern matches [search:123], [page:123], [image:123]
657
+ pattern = re.compile(r'\[(search|page|image):(\d+)\]', re.IGNORECASE)
658
+
659
+ matches = list(pattern.finditer(remaining_text))
660
+
661
+ search_map = {} # old_id_str -> new_id (int)
662
+ page_map = {}
663
+ image_map = {}
664
+
665
+ for m in matches:
666
+ tag_type = m.group(1).lower()
667
+ old_id_str = m.group(2)
668
+ try:
669
+ old_id = int(old_id_str)
670
+ except ValueError:
671
+ continue
672
+
673
+ # Check if we already processed this ID for this type
674
+ if tag_type == "search" and old_id_str in search_map: continue
675
+ if tag_type == "page" and old_id_str in page_map: continue
676
+ if tag_type == "image" and old_id_str in image_map: continue
677
+
678
+ # Find in all_web_results
679
+ result_item = next((r for r in self.all_web_results if r.get("_id") == old_id and r.get("_type") == tag_type), None)
680
+
681
+ if not result_item:
682
+ continue
683
+
684
+ entry = {
685
+ "title": result_item.get("title", ""),
686
+ "url": result_item.get("url", ""),
687
+ "domain": result_item.get("domain", "")
688
+ }
689
+ if tag_type == "image":
690
+ entry["thumbnail"] = result_item.get("thumbnail", "")
691
+
692
+ # Add to respective list and map
693
+ if tag_type == "search":
694
+ parsed["references"].append(entry)
695
+ search_map[old_id_str] = len(parsed["references"])
696
+ elif tag_type == "page":
697
+ parsed["page_references"].append(entry)
698
+ page_map[old_id_str] = len(parsed["page_references"])
699
+ elif tag_type == "image":
700
+ parsed["image_references"].append(entry)
701
+ image_map[old_id_str] = len(parsed["image_references"])
702
+
703
+ # 4. Replace tags in text with new sequential IDs
704
+ def replace_tag(match):
705
+ tag_type = match.group(1).lower()
706
+ old_id = match.group(2)
707
+
708
+ new_id = None
709
+ if tag_type == "search":
710
+ new_id = search_map.get(old_id)
711
+ elif tag_type == "page":
712
+ new_id = page_map.get(old_id)
713
+ elif tag_type == "image":
714
+ new_id = image_map.get(old_id)
715
+
716
+ if new_id is not None:
717
+ if tag_type == "image":
718
+ return ""
719
+ return f"[{tag_type}:{new_id}]"
720
+
721
+ return match.group(0)
722
+
723
+ remaining_text = pattern.sub(replace_tag, remaining_text)
711
724
 
712
725
  parsed["response"] = remaining_text.strip()
713
726
  return parsed
@@ -730,12 +743,11 @@ class ProcessingPipeline:
730
743
  query = args.get("query")
731
744
  web = await self.search_service.search(query)
732
745
 
733
- # Cache results and assign IDs
734
- current_max_id = max([item.get("_id", 0) for item in self.all_web_results], default=0)
735
-
746
+ # Cache results and assign search-specific IDs
736
747
  for item in web:
737
- current_max_id += 1
738
- item["_id"] = current_max_id
748
+ self.search_id_counter += 1
749
+ item["_id"] = self.search_id_counter
750
+ item["_type"] = "search"
739
751
  item["query"] = query
740
752
  self.all_web_results.append(item)
741
753
 
@@ -745,10 +757,11 @@ class ProcessingPipeline:
745
757
  query = args.get("query")
746
758
  images = await self.search_service.image_search(query)
747
759
 
748
- current_max_id = max([item.get("_id", 0) for item in self.all_web_results], default=0)
760
+ # Cache results and assign image-specific IDs
749
761
  for item in images:
750
- current_max_id += 1
751
- item["_id"] = current_max_id
762
+ self.image_id_counter += 1
763
+ item["_id"] = self.image_id_counter
764
+ item["_type"] = "image"
752
765
  item["query"] = query
753
766
  item["is_image"] = True
754
767
  self.all_web_results.append(item)
@@ -761,15 +774,15 @@ class ProcessingPipeline:
761
774
  # Returns Dict: {content, title, url}
762
775
  result_dict = await self.search_service.fetch_page(url)
763
776
 
764
- # Cache the crawled content so Agent can access it
765
- current_max_id = max([item.get("_id", 0) for item in self.all_web_results], default=0)
766
- current_max_id += 1
777
+ # Cache the crawled content with page-specific ID
778
+ self.page_id_counter += 1
767
779
 
768
780
  cached_item = {
769
- "_id": current_max_id,
781
+ "_id": self.page_id_counter,
782
+ "_type": "page",
770
783
  "title": result_dict.get("title", "Page"),
771
784
  "url": result_dict.get("url", url),
772
- "content": result_dict.get("content", "")[:2000], # Clip content for prompt
785
+ "content": result_dict.get("content", ""),
773
786
  "domain": "",
774
787
  "is_crawled": True,
775
788
  }
@@ -940,18 +953,13 @@ class ProcessingPipeline:
940
953
  if not self.all_web_results:
941
954
  return ""
942
955
 
943
- def clip(s: str, n: int) -> str:
944
- s = (s or "").strip()
945
- return s if len(s) <= n else s[: n - 1] + "…"
946
-
947
956
  lines = []
948
957
  for res in self.all_web_results:
949
- if res.get("is_image"): continue # Skip images
950
- if res.get("is_crawled"): continue # Skip crawled pages (handled separately)
958
+ if res.get("_type") != "search": continue # Only search results
951
959
  idx = res.get("_id")
952
- title = clip(res.get("title", ""), 80)
960
+ title = (res.get("title", "") or "").strip()
953
961
  url = res.get("url", "")
954
- content = clip(res.get("content", ""), 200)
962
+ content = (res.get("content", "") or "").strip()
955
963
  lines.append(f"[{idx}] Title: {title}\nURL: {url}\nSnippet: {content}\n")
956
964
 
957
965
  return "\n".join(lines)
@@ -961,17 +969,13 @@ class ProcessingPipeline:
961
969
  if not self.all_web_results:
962
970
  return ""
963
971
 
964
- def clip(s: str, n: int) -> str:
965
- s = (s or "").strip()
966
- return s if len(s) <= n else s[: n - 1] + "…"
967
-
968
972
  lines = []
969
973
  for res in self.all_web_results:
970
- if not res.get("is_crawled"): continue # Only crawled pages
974
+ if res.get("_type") != "page": continue # Only page results
971
975
  idx = res.get("_id")
972
- title = clip(res.get("title", ""), 80)
976
+ title = (res.get("title", "") or "").strip()
973
977
  url = res.get("url", "")
974
- content = clip(res.get("content", ""), 1500) # More content for pages
978
+ content = (res.get("content", "") or "").strip()
975
979
  lines.append(f"[{idx}] Title: {title}\nURL: {url}\nContent: {content}\n")
976
980
 
977
981
  return "\n".join(lines)
@@ -982,7 +986,7 @@ class ProcessingPipeline:
982
986
 
983
987
  lines = []
984
988
  for res in self.all_web_results:
985
- if not res.get("is_image"): continue
989
+ if res.get("_type") != "image": continue # Only image results
986
990
  idx = res.get("_id")
987
991
  title = res.get("title", "")
988
992
  url = res.get("image", "") or res.get("url", "")