entari-plugin-hyw 3.3.5__py3-none-any.whl → 3.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of entari-plugin-hyw might be problematic. Click here for more details.

@@ -16,6 +16,40 @@
16
16
  <script>{{ katex_auto_render_js | safe }}</script>
17
17
  <!-- @formatter:on -->
18
18
 
19
+ <style>
20
+ /* Fallback style for broken images in markdown content */
21
+ .img-error-fallback {
22
+ display: flex;
23
+ align-items: center;
24
+ justify-content: center;
25
+ gap: 8px;
26
+ width: 100%;
27
+ aspect-ratio: 16 / 9;
28
+ margin-bottom: 8px;
29
+ background: linear-gradient(135deg, #d3e4fd 0%, #b7d3fe 50%, #8bb9fc 100%);
30
+ border-radius: 12px;
31
+ color: white;
32
+ font-size: 14px;
33
+ font-weight: 500;
34
+ box-shadow: 0 4px 12px rgba(59, 130, 246, 0.25);
35
+ }
36
+ .img-error-fallback i {
37
+ font-size: 20px;
38
+ }
39
+ /* Dynamic image sizing based on aspect ratio */
40
+ #markdown-content img {
41
+ border-radius: 8px;
42
+ margin-bottom: 8px;
43
+ }
44
+ #markdown-content img.img-horizontal {
45
+ width: 100%;
46
+ height: auto;
47
+ }
48
+ #markdown-content img.img-vertical {
49
+ width: 60%;
50
+ height: auto;
51
+ }
52
+ </style>
19
53
  </head>
20
54
 
21
55
  <body class="bg-[#f2f2f2] p-0 box-border m-0 font-sans text-gray-800">
@@ -135,7 +169,7 @@
135
169
  {{ list_card(stage.icon_html, title_html, subtitle_html=stats_html, is_compact=True, icon_box_class=icon_box_class) }}
136
170
 
137
171
  {# Nested Children (Indent & Connect) #}
138
- {% if stage.references or stage.flow_steps or stage.crawled_pages %}
172
+ {% if stage.references or stage.image_references or stage.flow_steps or stage.crawled_pages %}
139
173
  <div class="ml-4 pl-4 border-l-2 border-gray-200 mt-2 flex flex-col gap-2">
140
174
 
141
175
  {# References #}
@@ -158,18 +192,23 @@
158
192
  {% endfor %}
159
193
  {% endif %}
160
194
 
161
- {# Flow Steps #}
162
- {% if stage.flow_steps %}
163
- <div class="text-[12px] uppercase font-bold text-orange-600 tracking-wider mb-1 mt-1">Flow</div>
164
- {% for step in stage.flow_steps %}
165
- {% set icon_box_class = "rounded-md border border-gray-100 bg-white text-gray-500 shrink-0" %}
195
+ {# Image References #}
196
+ {% if stage.image_references %}
197
+ <div class="text-[12px] uppercase font-bold text-blue-600 tracking-wider mb-1 mt-2">Images</div>
198
+ {% for img in stage.image_references %}
199
+ {% set favicon_url = "https://www.google.com/s2/favicons?domain=" + img.domain + "&sz=32" %}
166
200
 
167
- {% set title_html = '<div class="text-[13px] font-semibold text-gray-900 underline decoration-gray-300 decoration-1 underline-offset-2 truncate">' + step.description + '</div>' %}
168
- {% set subtitle_html = '<div class="text-[12px] text-gray-700 leading-tight truncate">' + step.description + '</div>' %}
169
- {% set right_html = '<div class="flex items-center justify-center min-w-[16px] h-4 px-0.5 text-[10px] font-bold text-orange-700 bg-orange-50 border border-orange-200 rounded">' + ('abcdefghijklmnopqrstuvwxyz'[loop.index0]) + '</div>' %}
201
+ {% set img_icon %}
202
+ <img src="{{ favicon_url }}" class="w-3.5 h-3.5 rounded-sm opacity-80">
203
+ {% endset %}
170
204
 
171
- {{ list_card(step.icon_svg, title_html, subtitle_html=subtitle_html, right_content_html=right_html, is_compact=True, icon_box_class=icon_box_class) }}
172
- {% endfor %}
205
+ {% set img_icon_box = "bg-white rounded border border-gray-100 w-6 h-6 shrink-0" %}
206
+
207
+ {% set title_html = '<div class="text-[13px] font-medium text-gray-900 truncate">' + img.title + '</div>' %}
208
+ {% set subtitle_html = '<div class="text-[12px] text-gray-500 truncate">' + img.domain + '</div>' %}
209
+
210
+ {{ list_card(img_icon, title_html, subtitle_html=subtitle_html, link_url=img.url, is_compact=True, icon_box_class=img_icon_box) }}
211
+ {% endfor %}
173
212
  {% endif %}
174
213
 
175
214
  {# Crawled Pages #}
@@ -272,23 +311,46 @@
272
311
  const fragment = document.createDocumentFragment();
273
312
  let lastIndex = 0;
274
313
  const text = textNode.nodeValue;
275
- const regex = /`?(search|page):(\d+)`?/gi;
314
+ // Regex to capture:
315
+ // 1. Optional brackets/parens: [(
316
+ // 2. Type: search/page
317
+ // 3. IDs: 1 or 1,2,3
318
+ // 4. Closing: )]
319
+ const regex = /[\[\(]?(search|page):\s*([\d,\s]+)[\]\)]?/gi;
276
320
  let match;
277
321
 
278
322
  while ((match = regex.exec(text)) !== null) {
323
+ // Validate match: simple check to ensure it contains digits
324
+ if (!/\d/.test(match[2])) continue;
325
+
279
326
  fragment.appendChild(document.createTextNode(text.substring(lastIndex, match.index)));
280
327
 
328
+ const fullMatch = match[0];
281
329
  const type = match[1].toLowerCase();
282
- const id = match[2];
330
+ const idString = match[2];
283
331
 
284
- const span = document.createElement("span");
285
- const isPage = type === "page";
286
- const colorClass = isPage
287
- ? "text-orange-600 bg-orange-50 border-orange-200"
288
- : "text-blue-600 bg-blue-50 border-blue-200";
332
+ // Parse IDs (split by comma or space)
333
+ const ids = idString.split(/[,\s]+/).filter(s => s.trim().length > 0);
289
334
 
290
- span.innerHTML = `<span class="inline-flex items-center justify-center min-w-[14px] h-4 px-0.5 text-[9px] font-bold ${colorClass} border rounded align-top -top-0.5 relative mx-0.5 cursor-default" title="${type}:${id}">${id}</span>`;
291
- fragment.appendChild(span.firstElementChild);
335
+ // Check for standard format (allow plain or [brackets])
336
+ // Standard: search:1, [search:1], page:1, [page:1]
337
+ // Non-standard: (page:1), page:1,2, (page:1,2)
338
+ const isStandard = /^[\[]?(search|page):\d+[\]]?$/i.test(fullMatch);
339
+
340
+ if (!isStandard) {
341
+ console.warn(`[Template] Detected non-standard citation format: "${fullMatch}". Rendered as: ${type}:${ids.join(',')}`);
342
+ }
343
+
344
+ ids.forEach(id => {
345
+ const span = document.createElement("span");
346
+ const isPage = type === "page";
347
+ const colorClass = isPage
348
+ ? "text-orange-600 bg-orange-50 border-orange-200"
349
+ : "text-blue-600 bg-blue-50 border-blue-200";
350
+
351
+ span.innerHTML = `<span class="inline-flex items-center justify-center min-w-[14px] h-4 px-0.5 text-[9px] font-bold ${colorClass} border rounded align-top -top-0.5 relative mx-0.5 cursor-default" title="${type}:${id}">${id}</span>`;
352
+ fragment.appendChild(span.firstElementChild);
353
+ });
292
354
 
293
355
  lastIndex = regex.lastIndex;
294
356
  }
@@ -301,6 +363,37 @@
301
363
  }
302
364
 
303
365
  processCitations(contentDiv);
366
+
367
+ // Handle broken images in markdown content
368
+ const contentImages = contentDiv.querySelectorAll('img');
369
+ contentImages.forEach(img => {
370
+ // Apply sizing class based on aspect ratio
371
+ const applySizeClass = function() {
372
+ if (this.naturalWidth >= this.naturalHeight) {
373
+ this.classList.add('img-horizontal');
374
+ } else {
375
+ this.classList.add('img-vertical');
376
+ }
377
+ };
378
+
379
+ img.onerror = function() {
380
+ const fallback = document.createElement('span');
381
+ fallback.className = 'img-error-fallback';
382
+ fallback.innerHTML = `<span style="font-size: 18px;">(。•́︿•̀。)</span><span>渲染失败</span>`;
383
+ this.parentNode.replaceChild(fallback, this);
384
+ };
385
+
386
+ // Check if image already loaded
387
+ if (img.complete) {
388
+ if (img.naturalHeight === 0) {
389
+ img.onerror();
390
+ } else {
391
+ applySizeClass.call(img);
392
+ }
393
+ } else {
394
+ img.onload = applySizeClass;
395
+ }
396
+ });
304
397
  });
305
398
  </script>
306
399
  </body>
@@ -35,3 +35,4 @@ class HYWConfig:
35
35
  vision_output_price: Optional[float] = None
36
36
  intruct_input_price: Optional[float] = None
37
37
  intruct_output_price: Optional[float] = None
38
+
@@ -39,6 +39,10 @@ class ProcessingPipeline:
39
39
  self.client = AsyncOpenAI(base_url=self.config.base_url, api_key=self.config.api_key)
40
40
  self.all_web_results = [] # Cache for search results
41
41
  self.current_mode = "standard" # standard | agent
42
+ # Independent ID counters for each type
43
+ self.search_id_counter = 0
44
+ self.page_id_counter = 0
45
+ self.image_id_counter = 0
42
46
 
43
47
  self.web_search_tool = {
44
48
  "type": "function",
@@ -118,8 +122,11 @@ class ProcessingPipeline:
118
122
  final_response_content = ""
119
123
  structured: Dict[str, Any] = {}
120
124
 
121
- # Reset search cache for this execution
125
+ # Reset search cache and ID counters for this execution
122
126
  self.all_web_results = []
127
+ self.search_id_counter = 0
128
+ self.page_id_counter = 0
129
+ self.image_id_counter = 0
123
130
 
124
131
  try:
125
132
  logger.info(f"Pipeline: Starting workflow for '{user_input}' using {active_model}")
@@ -244,8 +251,8 @@ class ProcessingPipeline:
244
251
  search_msgs_text = self._format_search_msgs()
245
252
  image_msgs_text = self._format_image_search_msgs()
246
253
 
247
- has_search_results = any(not r.get("is_image") for r in self.all_web_results)
248
- has_image_results = any(r.get("is_image") for r in self.all_web_results)
254
+ has_search_results = any(r.get("_type") == "search" for r in self.all_web_results)
255
+ has_image_results = any(r.get("_type") == "image" for r in self.all_web_results)
249
256
 
250
257
  # Build agent system prompt
251
258
  agent_prompt_tpl = getattr(self.config, "agent_system_prompt", None) or AGENT_SP
@@ -462,7 +469,7 @@ class ProcessingPipeline:
462
469
  for tc in crawl_calls:
463
470
  url = tc.get("arguments", {}).get("url", "")
464
471
  # Try to find cached result
465
- found = next((r for r in self.all_web_results if r.get("url") == url and r.get("is_crawled")), None)
472
+ found = next((r for r in self.all_web_results if r.get("url") == url and r.get("_type") == "page"), None)
466
473
  if found:
467
474
  try:
468
475
  from urllib.parse import urlparse
@@ -588,6 +595,19 @@ class ProcessingPipeline:
588
595
  last_agent["time"] = a.get("time", 0)
589
596
  last_agent["cost"] = a.get("cost", 0.0)
590
597
 
598
+ # Clean up conversation history: Remove tool calls and results to save tokens and avoid ID conflicts
599
+ # Keep only 'user' messages and 'assistant' messages without tool_calls (final answers)
600
+ cleaned_history = []
601
+ for msg in current_history:
602
+ if msg.get("role") == "tool":
603
+ continue
604
+ if msg.get("role") == "assistant" and msg.get("tool_calls"):
605
+ continue
606
+ cleaned_history.append(msg)
607
+
608
+ # Update the reference (since it might be used by caller)
609
+ current_history[:] = cleaned_history
610
+
591
611
  return {
592
612
  "llm_response": final_content,
593
613
  "structured_response": structured,
@@ -609,8 +629,8 @@ class ProcessingPipeline:
609
629
  }
610
630
 
611
631
  def _parse_tagged_response(self, text: str) -> Dict[str, Any]:
612
- """Parse response for references and page references."""
613
- parsed = {"response": "", "references": [], "page_references": [], "flow_steps": []}
632
+ """Parse response for references and page references reordered by appearance."""
633
+ parsed = {"response": "", "references": [], "page_references": [], "image_references": [], "flow_steps": []}
614
634
  if not text:
615
635
  return parsed
616
636
 
@@ -620,7 +640,6 @@ class ProcessingPipeline:
620
640
 
621
641
  # 1. Try to unwrap JSON if the model acted like a ReAct agent
622
642
  try:
623
- # Check if it looks like JSON first to avoid performance hit
624
643
  if remaining_text.strip().startswith("{") and "action" in remaining_text:
625
644
  data = json.loads(remaining_text)
626
645
  if isinstance(data, dict) and "action_input" in data:
@@ -628,86 +647,104 @@ class ProcessingPipeline:
628
647
  except Exception:
629
648
  pass
630
649
 
631
- id_map = {} # Map original search ID (str) -> new index (int)
632
- page_id_map = {} # Map original page ID (str) -> new index (int)
650
+ # 2. Extract references from text first (Order by appearance)
651
+ # Pattern matches [search:123], [page:123], [image:123]
652
+ pattern = re.compile(r'\[(search|page|image):(\d+)\]', re.IGNORECASE)
653
+
654
+ matches = list(pattern.finditer(remaining_text))
655
+
656
+ search_map = {} # old_id_str -> new_id (int)
657
+ page_map = {}
658
+ image_map = {}
659
+
660
+ def process_ref(tag_type, old_id):
661
+ # Find in all_web_results
662
+ result_item = next((r for r in self.all_web_results if r.get("_id") == old_id and r.get("_type") == tag_type), None)
663
+
664
+ if not result_item:
665
+ return
666
+
667
+ entry = {
668
+ "title": result_item.get("title", ""),
669
+ "url": result_item.get("url", ""),
670
+ "domain": result_item.get("domain", "")
671
+ }
672
+ if tag_type == "image":
673
+ entry["thumbnail"] = result_item.get("thumbnail", "")
674
+
675
+ # Add to respective list and map
676
+ # Check maps to avoid duplicates
677
+ if tag_type == "search":
678
+ if str(old_id) not in search_map:
679
+ parsed["references"].append(entry)
680
+ search_map[str(old_id)] = len(parsed["references"])
681
+ elif tag_type == "page":
682
+ if str(old_id) not in page_map:
683
+ parsed["page_references"].append(entry)
684
+ page_map[str(old_id)] = len(parsed["page_references"])
685
+ elif tag_type == "image":
686
+ if str(old_id) not in image_map:
687
+ parsed["image_references"].append(entry)
688
+ image_map[str(old_id)] = len(parsed["image_references"])
689
+
690
+ # Pass 1: Text Body
691
+ for m in matches:
692
+ try:
693
+ process_ref(m.group(1).lower(), int(m.group(2)))
694
+ except ValueError:
695
+ continue
633
696
 
634
- # Parse References Block (unified: contains both [search] and [page] entries)
697
+ # 3. Pass 2: References Block (Capture items missed in text)
635
698
  ref_block_match = re.search(r'```references\s*(.*?)\s*```', remaining_text, re.DOTALL | re.IGNORECASE)
636
699
  if ref_block_match:
637
700
  ref_content = ref_block_match.group(1).strip()
701
+ remaining_text = remaining_text.replace(ref_block_match.group(0), "").strip()
702
+
638
703
  for line in ref_content.split("\n"):
639
704
  line = line.strip()
640
705
  if not line: continue
706
+ # Match [id] [type]
707
+ # e.g. [1] [image] ... or [image:1] ...
641
708
 
642
- # Match [id] [type] [title](url)
643
- # e.g. [1] [search] [文本描述](url) or [5] [page] [页面标题](url)
644
- id_match = re.match(r"^\[(\d+)\]", line)
645
- type_match = re.search(r"\[(search|page)\]", line, re.IGNORECASE)
646
- link_match = re.search(r"\[([^\[\]]+)\]\(([^)]+)\)", line)
647
-
648
- idx = None
709
+ # Check for [id] [type] format
710
+ id_match = re.match(r"^\[(\d+)\]\s*\[(search|page|image)\]", line, re.IGNORECASE)
649
711
  if id_match:
650
712
  try:
651
- idx = int(id_match.group(1))
713
+ process_ref(id_match.group(2).lower(), int(id_match.group(1)))
652
714
  except ValueError:
653
715
  pass
654
-
655
- ref_type = "search" # default
656
- if type_match:
657
- ref_type = type_match.group(1).lower()
658
-
659
- entry = None
660
- if idx is not None and self.all_web_results:
661
- # For page type, only match crawled items
662
- if ref_type == "page":
663
- found = next((r for r in self.all_web_results if r.get("_id") == idx and r.get("is_crawled")), None)
664
- else:
665
- found = next((r for r in self.all_web_results if r.get("_id") == idx and not r.get("is_crawled")), None)
666
-
667
- if found:
668
- entry = {
669
- "title": found.get("title"),
670
- "url": found.get("url"),
671
- "domain": found.get("domain", "")
672
- }
673
-
674
- if not entry and link_match:
675
- entry = {"title": link_match.group(1), "url": link_match.group(2)}
676
-
677
- if entry:
678
- if ref_type == "page":
679
- parsed["page_references"].append(entry)
680
- if idx is not None:
681
- page_id_map[str(idx)] = len(parsed["page_references"])
682
- else:
683
- parsed["references"].append(entry)
684
- if idx is not None:
685
- id_map[str(idx)] = len(parsed["references"])
686
-
687
- remaining_text = remaining_text.replace(ref_block_match.group(0), "").strip()
716
+ else:
717
+ # Check for [type:id] format in list
718
+ alt_match = re.match(r"^\[(search|page|image):(\d+)\]", line, re.IGNORECASE)
719
+ if alt_match:
720
+ try:
721
+ process_ref(alt_match.group(1).lower(), int(alt_match.group(2)))
722
+ except ValueError:
723
+ pass
724
+
725
+ # 4. Replace tags in text with new sequential IDs
726
+
727
+ # 4. Replace tags in text with new sequential IDs
728
+ def replace_tag(match):
729
+ tag_type = match.group(1).lower()
730
+ old_id = match.group(2)
731
+
732
+ new_id = None
733
+ if tag_type == "search":
734
+ new_id = search_map.get(old_id)
735
+ elif tag_type == "page":
736
+ new_id = page_map.get(old_id)
737
+ elif tag_type == "image":
738
+ new_id = image_map.get(old_id)
739
+
740
+ if new_id is not None:
741
+ if tag_type == "image":
742
+ return ""
743
+ return f"[{tag_type}:{new_id}]"
744
+
745
+ return match.group(0)
688
746
 
689
- # Replace search:id citations
690
- if id_map:
691
- def replace_search_citation(match):
692
- old_id = match.group(1) or match.group(2)
693
- if old_id in id_map:
694
- return f"`search:{id_map[old_id]}`"
695
- return match.group(0)
696
-
697
- remaining_text = re.sub(r'\[(\d+)\]', replace_search_citation, remaining_text)
698
- remaining_text = re.sub(r'(?<!`)search:(\d+)(?!`)', replace_search_citation, remaining_text)
699
- remaining_text = re.sub(r'`search:(\d+)`', replace_search_citation, remaining_text)
700
-
701
- # Replace page:id citations
702
- if page_id_map:
703
- def replace_page_citation(match):
704
- old_id = match.group(1)
705
- if old_id in page_id_map:
706
- return f"`page:{page_id_map[old_id]}`"
707
- return match.group(0)
708
-
709
- remaining_text = re.sub(r'(?<!`)page:(\d+)(?!`)', replace_page_citation, remaining_text)
710
- remaining_text = re.sub(r'`page:(\d+)`', replace_page_citation, remaining_text)
747
+ remaining_text = pattern.sub(replace_tag, remaining_text)
711
748
 
712
749
  parsed["response"] = remaining_text.strip()
713
750
  return parsed
@@ -730,12 +767,11 @@ class ProcessingPipeline:
730
767
  query = args.get("query")
731
768
  web = await self.search_service.search(query)
732
769
 
733
- # Cache results and assign IDs
734
- current_max_id = max([item.get("_id", 0) for item in self.all_web_results], default=0)
735
-
770
+ # Cache results and assign search-specific IDs
736
771
  for item in web:
737
- current_max_id += 1
738
- item["_id"] = current_max_id
772
+ self.search_id_counter += 1
773
+ item["_id"] = self.search_id_counter
774
+ item["_type"] = "search"
739
775
  item["query"] = query
740
776
  self.all_web_results.append(item)
741
777
 
@@ -745,10 +781,11 @@ class ProcessingPipeline:
745
781
  query = args.get("query")
746
782
  images = await self.search_service.image_search(query)
747
783
 
748
- current_max_id = max([item.get("_id", 0) for item in self.all_web_results], default=0)
784
+ # Cache results and assign image-specific IDs
749
785
  for item in images:
750
- current_max_id += 1
751
- item["_id"] = current_max_id
786
+ self.image_id_counter += 1
787
+ item["_id"] = self.image_id_counter
788
+ item["_type"] = "image"
752
789
  item["query"] = query
753
790
  item["is_image"] = True
754
791
  self.all_web_results.append(item)
@@ -761,15 +798,15 @@ class ProcessingPipeline:
761
798
  # Returns Dict: {content, title, url}
762
799
  result_dict = await self.search_service.fetch_page(url)
763
800
 
764
- # Cache the crawled content so Agent can access it
765
- current_max_id = max([item.get("_id", 0) for item in self.all_web_results], default=0)
766
- current_max_id += 1
801
+ # Cache the crawled content with page-specific ID
802
+ self.page_id_counter += 1
767
803
 
768
804
  cached_item = {
769
- "_id": current_max_id,
805
+ "_id": self.page_id_counter,
806
+ "_type": "page",
770
807
  "title": result_dict.get("title", "Page"),
771
808
  "url": result_dict.get("url", url),
772
- "content": result_dict.get("content", "")[:2000], # Clip content for prompt
809
+ "content": result_dict.get("content", ""),
773
810
  "domain": "",
774
811
  "is_crawled": True,
775
812
  }
@@ -940,18 +977,13 @@ class ProcessingPipeline:
940
977
  if not self.all_web_results:
941
978
  return ""
942
979
 
943
- def clip(s: str, n: int) -> str:
944
- s = (s or "").strip()
945
- return s if len(s) <= n else s[: n - 1] + "…"
946
-
947
980
  lines = []
948
981
  for res in self.all_web_results:
949
- if res.get("is_image"): continue # Skip images
950
- if res.get("is_crawled"): continue # Skip crawled pages (handled separately)
982
+ if res.get("_type") != "search": continue # Only search results
951
983
  idx = res.get("_id")
952
- title = clip(res.get("title", ""), 80)
984
+ title = (res.get("title", "") or "").strip()
953
985
  url = res.get("url", "")
954
- content = clip(res.get("content", ""), 200)
986
+ content = (res.get("content", "") or "").strip()
955
987
  lines.append(f"[{idx}] Title: {title}\nURL: {url}\nSnippet: {content}\n")
956
988
 
957
989
  return "\n".join(lines)
@@ -961,17 +993,13 @@ class ProcessingPipeline:
961
993
  if not self.all_web_results:
962
994
  return ""
963
995
 
964
- def clip(s: str, n: int) -> str:
965
- s = (s or "").strip()
966
- return s if len(s) <= n else s[: n - 1] + "…"
967
-
968
996
  lines = []
969
997
  for res in self.all_web_results:
970
- if not res.get("is_crawled"): continue # Only crawled pages
998
+ if res.get("_type") != "page": continue # Only page results
971
999
  idx = res.get("_id")
972
- title = clip(res.get("title", ""), 80)
1000
+ title = (res.get("title", "") or "").strip()
973
1001
  url = res.get("url", "")
974
- content = clip(res.get("content", ""), 1500) # More content for pages
1002
+ content = (res.get("content", "") or "").strip()
975
1003
  lines.append(f"[{idx}] Title: {title}\nURL: {url}\nContent: {content}\n")
976
1004
 
977
1005
  return "\n".join(lines)
@@ -982,7 +1010,7 @@ class ProcessingPipeline:
982
1010
 
983
1011
  lines = []
984
1012
  for res in self.all_web_results:
985
- if not res.get("is_image"): continue
1013
+ if res.get("_type") != "image": continue # Only image results
986
1014
  idx = res.get("_id")
987
1015
  title = res.get("title", "")
988
1016
  url = res.get("image", "") or res.get("url", "")