entari-plugin-hyw 4.0.0rc5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of entari-plugin-hyw might be problematic. Click here for more details.

Files changed (99) hide show
  1. entari_plugin_hyw/__init__.py +532 -0
  2. entari_plugin_hyw/assets/card-dist/index.html +387 -0
  3. entari_plugin_hyw/assets/card-dist/logos/anthropic.svg +1 -0
  4. entari_plugin_hyw/assets/card-dist/logos/cerebras.svg +9 -0
  5. entari_plugin_hyw/assets/card-dist/logos/deepseek.png +0 -0
  6. entari_plugin_hyw/assets/card-dist/logos/gemini.svg +1 -0
  7. entari_plugin_hyw/assets/card-dist/logos/google.svg +1 -0
  8. entari_plugin_hyw/assets/card-dist/logos/grok.png +0 -0
  9. entari_plugin_hyw/assets/card-dist/logos/huggingface.png +0 -0
  10. entari_plugin_hyw/assets/card-dist/logos/microsoft.svg +15 -0
  11. entari_plugin_hyw/assets/card-dist/logos/minimax.png +0 -0
  12. entari_plugin_hyw/assets/card-dist/logos/mistral.png +0 -0
  13. entari_plugin_hyw/assets/card-dist/logos/nvida.png +0 -0
  14. entari_plugin_hyw/assets/card-dist/logos/openai.svg +1 -0
  15. entari_plugin_hyw/assets/card-dist/logos/openrouter.png +0 -0
  16. entari_plugin_hyw/assets/card-dist/logos/perplexity.svg +24 -0
  17. entari_plugin_hyw/assets/card-dist/logos/qwen.png +0 -0
  18. entari_plugin_hyw/assets/card-dist/logos/xai.png +0 -0
  19. entari_plugin_hyw/assets/card-dist/logos/xiaomi.png +0 -0
  20. entari_plugin_hyw/assets/card-dist/logos/zai.png +0 -0
  21. entari_plugin_hyw/assets/card-dist/vite.svg +1 -0
  22. entari_plugin_hyw/assets/icon/anthropic.svg +1 -0
  23. entari_plugin_hyw/assets/icon/cerebras.svg +9 -0
  24. entari_plugin_hyw/assets/icon/deepseek.png +0 -0
  25. entari_plugin_hyw/assets/icon/gemini.svg +1 -0
  26. entari_plugin_hyw/assets/icon/google.svg +1 -0
  27. entari_plugin_hyw/assets/icon/grok.png +0 -0
  28. entari_plugin_hyw/assets/icon/huggingface.png +0 -0
  29. entari_plugin_hyw/assets/icon/microsoft.svg +15 -0
  30. entari_plugin_hyw/assets/icon/minimax.png +0 -0
  31. entari_plugin_hyw/assets/icon/mistral.png +0 -0
  32. entari_plugin_hyw/assets/icon/nvida.png +0 -0
  33. entari_plugin_hyw/assets/icon/openai.svg +1 -0
  34. entari_plugin_hyw/assets/icon/openrouter.png +0 -0
  35. entari_plugin_hyw/assets/icon/perplexity.svg +24 -0
  36. entari_plugin_hyw/assets/icon/qwen.png +0 -0
  37. entari_plugin_hyw/assets/icon/xai.png +0 -0
  38. entari_plugin_hyw/assets/icon/xiaomi.png +0 -0
  39. entari_plugin_hyw/assets/icon/zai.png +0 -0
  40. entari_plugin_hyw/browser/__init__.py +10 -0
  41. entari_plugin_hyw/browser/engines/base.py +13 -0
  42. entari_plugin_hyw/browser/engines/bing.py +95 -0
  43. entari_plugin_hyw/browser/engines/searxng.py +137 -0
  44. entari_plugin_hyw/browser/landing.html +172 -0
  45. entari_plugin_hyw/browser/manager.py +153 -0
  46. entari_plugin_hyw/browser/service.py +275 -0
  47. entari_plugin_hyw/card-ui/.gitignore +24 -0
  48. entari_plugin_hyw/card-ui/README.md +5 -0
  49. entari_plugin_hyw/card-ui/index.html +16 -0
  50. entari_plugin_hyw/card-ui/package-lock.json +2342 -0
  51. entari_plugin_hyw/card-ui/package.json +31 -0
  52. entari_plugin_hyw/card-ui/public/logos/anthropic.svg +1 -0
  53. entari_plugin_hyw/card-ui/public/logos/cerebras.svg +9 -0
  54. entari_plugin_hyw/card-ui/public/logos/deepseek.png +0 -0
  55. entari_plugin_hyw/card-ui/public/logos/gemini.svg +1 -0
  56. entari_plugin_hyw/card-ui/public/logos/google.svg +1 -0
  57. entari_plugin_hyw/card-ui/public/logos/grok.png +0 -0
  58. entari_plugin_hyw/card-ui/public/logos/huggingface.png +0 -0
  59. entari_plugin_hyw/card-ui/public/logos/microsoft.svg +15 -0
  60. entari_plugin_hyw/card-ui/public/logos/minimax.png +0 -0
  61. entari_plugin_hyw/card-ui/public/logos/mistral.png +0 -0
  62. entari_plugin_hyw/card-ui/public/logos/nvida.png +0 -0
  63. entari_plugin_hyw/card-ui/public/logos/openai.svg +1 -0
  64. entari_plugin_hyw/card-ui/public/logos/openrouter.png +0 -0
  65. entari_plugin_hyw/card-ui/public/logos/perplexity.svg +24 -0
  66. entari_plugin_hyw/card-ui/public/logos/qwen.png +0 -0
  67. entari_plugin_hyw/card-ui/public/logos/xai.png +0 -0
  68. entari_plugin_hyw/card-ui/public/logos/xiaomi.png +0 -0
  69. entari_plugin_hyw/card-ui/public/logos/zai.png +0 -0
  70. entari_plugin_hyw/card-ui/public/vite.svg +1 -0
  71. entari_plugin_hyw/card-ui/src/App.vue +756 -0
  72. entari_plugin_hyw/card-ui/src/assets/vue.svg +1 -0
  73. entari_plugin_hyw/card-ui/src/components/HelloWorld.vue +41 -0
  74. entari_plugin_hyw/card-ui/src/components/MarkdownContent.vue +382 -0
  75. entari_plugin_hyw/card-ui/src/components/SectionCard.vue +41 -0
  76. entari_plugin_hyw/card-ui/src/components/StageCard.vue +240 -0
  77. entari_plugin_hyw/card-ui/src/main.ts +5 -0
  78. entari_plugin_hyw/card-ui/src/style.css +29 -0
  79. entari_plugin_hyw/card-ui/src/test_regex.js +103 -0
  80. entari_plugin_hyw/card-ui/src/types.ts +61 -0
  81. entari_plugin_hyw/card-ui/tsconfig.app.json +16 -0
  82. entari_plugin_hyw/card-ui/tsconfig.json +7 -0
  83. entari_plugin_hyw/card-ui/tsconfig.node.json +26 -0
  84. entari_plugin_hyw/card-ui/vite.config.ts +16 -0
  85. entari_plugin_hyw/definitions.py +130 -0
  86. entari_plugin_hyw/history.py +248 -0
  87. entari_plugin_hyw/image_cache.py +274 -0
  88. entari_plugin_hyw/misc.py +135 -0
  89. entari_plugin_hyw/modular_pipeline.py +351 -0
  90. entari_plugin_hyw/render_vue.py +401 -0
  91. entari_plugin_hyw/search.py +116 -0
  92. entari_plugin_hyw/stage_base.py +88 -0
  93. entari_plugin_hyw/stage_instruct.py +328 -0
  94. entari_plugin_hyw/stage_instruct_review.py +92 -0
  95. entari_plugin_hyw/stage_summary.py +164 -0
  96. entari_plugin_hyw-4.0.0rc5.dist-info/METADATA +116 -0
  97. entari_plugin_hyw-4.0.0rc5.dist-info/RECORD +99 -0
  98. entari_plugin_hyw-4.0.0rc5.dist-info/WHEEL +5 -0
  99. entari_plugin_hyw-4.0.0rc5.dist-info/top_level.txt +1 -0
@@ -0,0 +1,351 @@
1
+ """
2
+ Modular Pipeline Dispatcher
3
+
4
+ New pipeline architecture: Instruct Loop (x2) -> Summary.
5
+ Simpler flow with self-correction/feedback loop.
6
+ """
7
+
8
+ import asyncio
9
+ import time
10
+ from typing import Any, Dict, List, Optional
11
+
12
+ from loguru import logger
13
+ from openai import AsyncOpenAI
14
+
15
+ from .stage_base import StageContext
16
+ from .stage_instruct import InstructStage
17
+ from .stage_instruct_review import InstructReviewStage
18
+ from .stage_summary import SummaryStage
19
+ from .search import SearchService
20
+
21
+
22
+ class ModularPipeline:
23
+ """
24
+ Modular Pipeline.
25
+
26
+ Flow:
27
+ 1. Instruct (Round 1): Initial Discovery.
28
+ 2. Instruct Review (Round 2): Review & Refine.
29
+ 3. Summary: Generate final response.
30
+ """
31
+
32
+ def __init__(self, config: Any):
33
+ self.config = config
34
+ self.search_service = SearchService(config)
35
+ self.client = AsyncOpenAI(base_url=config.base_url, api_key=config.api_key)
36
+
37
+ # Initialize stages
38
+ self.instruct_stage = InstructStage(config, self.search_service, self.client)
39
+ self.instruct_review_stage = InstructReviewStage(config, self.search_service, self.client)
40
+ self.summary_stage = SummaryStage(config, self.search_service, self.client)
41
+
42
+ async def execute(
43
+ self,
44
+ user_input: str,
45
+ conversation_history: List[Dict],
46
+ model_name: str = None,
47
+ images: List[str] = None,
48
+ vision_model_name: str = None,
49
+ selected_vision_model: str = None,
50
+ ) -> Dict[str, Any]:
51
+ """Execute the modular pipeline."""
52
+ start_time = time.time()
53
+ stats = {"start_time": start_time}
54
+ usage_totals = {"input_tokens": 0, "output_tokens": 0}
55
+ active_model = model_name or self.config.model_name
56
+
57
+ context = StageContext(
58
+ user_input=user_input,
59
+ images=images or [],
60
+ conversation_history=conversation_history,
61
+ )
62
+
63
+ trace: Dict[str, Any] = {
64
+ "instruct_rounds": [],
65
+ "summary": None,
66
+ }
67
+
68
+ try:
69
+ logger.info(f"Pipeline: Processing '{user_input[:30]}...'")
70
+
71
+ # === Stage 1: Instruct (Initial Discovery) ===
72
+ logger.info("Pipeline: Stage 1 - Instruct")
73
+ instruct_result = await self.instruct_stage.execute(context)
74
+
75
+ # Trace & Usage
76
+ instruct_result.trace["stage_name"] = "Instruct (Round 1)"
77
+ trace["instruct_rounds"].append(instruct_result.trace)
78
+ usage_totals["input_tokens"] += instruct_result.usage.get("input_tokens", 0)
79
+ usage_totals["output_tokens"] += instruct_result.usage.get("output_tokens", 0)
80
+
81
+ # Check refuse
82
+ if context.should_refuse:
83
+ return self._build_refusal_response(context, conversation_history, active_model, stats)
84
+
85
+ # === Stage 2: Instruct Review (Refine) ===
86
+ logger.info("Pipeline: Stage 2 - Instruct Review")
87
+ review_result = await self.instruct_review_stage.execute(context)
88
+
89
+ # Trace & Usage
90
+ review_result.trace["stage_name"] = "Instruct Review (Round 2)"
91
+ trace["instruct_rounds"].append(review_result.trace)
92
+ usage_totals["input_tokens"] += review_result.usage.get("input_tokens", 0)
93
+ usage_totals["output_tokens"] += review_result.usage.get("output_tokens", 0)
94
+
95
+ # === Stage 3: Summary ===
96
+ # Collect page screenshots if image mode (already rendered in InstructStage)
97
+ all_images = list(images) if images else []
98
+
99
+ if getattr(self.config, "page_content_mode", "text") == "image":
100
+ # Collect pre-rendered screenshots from web_results
101
+ for r in context.web_results:
102
+ if r.get("_type") == "page" and r.get("screenshot_b64"):
103
+ all_images.append(r["screenshot_b64"])
104
+
105
+ summary_result = await self.summary_stage.execute(
106
+ context,
107
+ images=all_images if all_images else None
108
+ )
109
+ trace["summary"] = summary_result.trace
110
+ usage_totals["input_tokens"] += summary_result.usage.get("input_tokens", 0)
111
+ usage_totals["output_tokens"] += summary_result.usage.get("output_tokens", 0)
112
+
113
+ summary_content = summary_result.data.get("content", "")
114
+
115
+ # === Result Assembly ===
116
+ stats["total_time"] = time.time() - start_time
117
+ structured = self._parse_response(summary_content, context)
118
+
119
+ # === Image Caching (Prefetch images for UI) ===
120
+ try:
121
+ from .image_cache import get_image_cache
122
+ cache = get_image_cache()
123
+
124
+ # 1. Collect all image URLs from structured response
125
+ all_image_urls = []
126
+ for ref in structured.get("references", []):
127
+ if ref.get("images"):
128
+ all_image_urls.extend([img for img in ref["images"] if img and img.startswith("http")])
129
+
130
+ if all_image_urls:
131
+ # 2. Prefetch (wait for them as we are about to render)
132
+ cached_map = await cache.get_all_cached(all_image_urls)
133
+
134
+ # 3. Update structured response with cached (base64) URLs
135
+ for ref in structured.get("references", []):
136
+ if ref.get("images"):
137
+ # Filter: Only keep images that were successfully cached (starts with data:)
138
+ # Discard original URLs if download failed, to prevent broken images in UI
139
+ new_images = []
140
+ for img in ref["images"]:
141
+ cached_val = cached_map.get(img)
142
+ if cached_val and cached_val.startswith("data:"):
143
+ new_images.append(cached_val)
144
+ ref["images"] = new_images
145
+ except Exception as e:
146
+ logger.warning(f"Pipeline: Image caching failed: {e}")
147
+
148
+ stages_used = self._build_stages_ui(trace, context, images)
149
+
150
+ conversation_history.append({"role": "user", "content": user_input})
151
+ conversation_history.append({"role": "assistant", "content": summary_content})
152
+
153
+ return {
154
+ "llm_response": summary_content,
155
+ "structured_response": structured,
156
+ "stats": stats,
157
+ "model_used": active_model,
158
+ "conversation_history": conversation_history,
159
+ "trace_markdown": self._render_trace_markdown(trace),
160
+ "billing_info": {
161
+ "input_tokens": usage_totals["input_tokens"],
162
+ "output_tokens": usage_totals["output_tokens"],
163
+ "total_cost": 0.0
164
+ },
165
+ "stages_used": stages_used,
166
+ "web_results": context.web_results,
167
+ }
168
+
169
+ except Exception as e:
170
+ logger.error(f"Pipeline: Critical Error - {e}")
171
+ import traceback
172
+ logger.error(traceback.format_exc())
173
+ return {
174
+ "llm_response": f"Error: {e}",
175
+ "stats": stats,
176
+ "error": str(e)
177
+ }
178
+
179
+ def _build_refusal_response(self, context, history, model, stats):
180
+ return {
181
+ "llm_response": "Refused",
182
+ "structured_response": {},
183
+ "stats": stats,
184
+ "model_used": model,
185
+ "conversation_history": history,
186
+ "refuse_answer": True,
187
+ "refuse_reason": context.refuse_reason
188
+ }
189
+
190
+ def _parse_response(self, text: str, context: StageContext) -> Dict[str, Any]:
191
+ """Parse response and extract citations, prioritizing fetched items."""
192
+ import re
193
+ parsed = {"response": "", "references": [], "page_references": [], "image_references": []}
194
+ if not text: return parsed
195
+
196
+ # Simple cleanup
197
+ ref_pattern = re.compile(r'(?:\n\s*|^)\s*(?:#{1,3}|\*\*)\s*(?:References|Citations|Sources|参考资料)[\s\S]*$', re.IGNORECASE | re.MULTILINE)
198
+ body_text = ref_pattern.sub('', text)
199
+
200
+ # 1. Identify all cited numeric IDs from [N]
201
+ cited_ids = []
202
+ for m in re.finditer(r'\[(\d+)\]', body_text):
203
+ try:
204
+ cid = int(m.group(1))
205
+ if cid not in cited_ids: cited_ids.append(cid)
206
+ except: pass
207
+
208
+ # 2. Collect cited items and determine "is_fetched" status
209
+ cited_items = []
210
+ for cid in cited_ids:
211
+ item = next((r for r in context.web_results if r.get("_id") == cid), None)
212
+ if not item: continue
213
+
214
+ # Check if this URL was fetched (appears as a "page" result)
215
+ is_fetched = any(r.get("_type") == "page" and r.get("url") == item.get("url") for r in context.web_results)
216
+ cited_items.append({
217
+ "original_id": cid,
218
+ "item": item,
219
+ "is_fetched": is_fetched
220
+ })
221
+
222
+ # 3. Sort: Fetched pages first, then regular search results
223
+ cited_items.sort(key=lambda x: x["is_fetched"], reverse=True)
224
+
225
+ # 4. Create Re-indexing Map
226
+ reindex_map = {}
227
+ for i, entry in enumerate(cited_items):
228
+ reindex_map[entry["original_id"]] = i + 1
229
+
230
+ # Populate result references in sorted order
231
+ item = entry["item"]
232
+ ref_entry = {
233
+ "title": item.get("title", ""),
234
+ "url": item.get("url", ""),
235
+ "domain": item.get("domain", ""),
236
+ "snippet": (item.get("content", "") or "")[:200] + "...", # More snippet
237
+ "is_fetched": entry["is_fetched"],
238
+ "type": item.get("_type", "search"),
239
+ "raw_screenshot_b64": item.get("raw_screenshot_b64"), # Real page screenshot for Sources
240
+ "images": item.get("images"),
241
+ }
242
+ # Add to unified list (frontend can handle splitting if needed, but we provide sorted order)
243
+ parsed["references"].append(ref_entry)
244
+
245
+ # 5. Replace [N] in text with new indices
246
+ def repl(m):
247
+ try:
248
+ oid = int(m.group(1))
249
+ return f"[{reindex_map[oid]}]" if oid in reindex_map else m.group(0)
250
+ except: return m.group(0)
251
+
252
+ parsed["response"] = re.sub(r'\[(\d+)\]', repl, body_text).strip()
253
+ return parsed
254
+
255
+ def _build_stages_ui(self, trace: Dict[str, Any], context: StageContext, images: List[str]) -> List[Dict[str, Any]]:
256
+ stages = []
257
+
258
+ # 1. Search Results
259
+ search_refs = []
260
+ seen = set()
261
+ for r in context.web_results:
262
+ if r.get("_type") == "search" and r.get("url") not in seen:
263
+ seen.add(r["url"])
264
+ is_fetched = any(p.get("url") == r["url"] for p in context.web_results if p.get("_type") == "page")
265
+ search_refs.append({
266
+ "title": r.get("title", ""),
267
+ "url": r["url"],
268
+ "snippet": (r.get("content", "") or "")[:100] + "...",
269
+ "is_fetched": is_fetched
270
+ })
271
+
272
+ # Sort: Fetched first
273
+ search_refs.sort(key=lambda x: x["is_fetched"], reverse=True)
274
+
275
+ if search_refs:
276
+ stages.append({
277
+ "name": "Search",
278
+ "model": "Web Search",
279
+ "icon_config": "openai",
280
+ "provider": "Web",
281
+ "references": search_refs,
282
+ "description": f"Found {len(search_refs)} results."
283
+ })
284
+
285
+ # 2. Instruct Rounds
286
+ for i, t in enumerate(trace.get("instruct_rounds", [])):
287
+ stage_name = t.get("stage_name", f"Analysis {i+1}")
288
+ tool_count = t.get("tool_calls", 0)
289
+ desc = t.get("output", "")
290
+
291
+ if tool_count > 0:
292
+ # If tools were used, prefer showing tool info even if there's reasoning
293
+ desc = f"Executed {tool_count} tool calls."
294
+ elif not desc:
295
+ desc = "Processing..."
296
+
297
+ # Calculate cost from config prices
298
+ usage = t.get("usage", {})
299
+ instruct_cfg = self.config.get_model_config("instruct")
300
+ input_price = instruct_cfg.get("input_price") or 0
301
+ output_price = instruct_cfg.get("output_price") or 0
302
+ cost = (usage.get("input_tokens", 0) * input_price + usage.get("output_tokens", 0) * output_price) / 1_000_000
303
+
304
+ stages.append({
305
+ "name": stage_name,
306
+ "model": t.get("model"),
307
+ "icon_config": "google",
308
+ "provider": "Instruct",
309
+ "time": t.get("time", 0),
310
+ "description": desc,
311
+ "usage": usage,
312
+ "cost": cost
313
+ })
314
+
315
+ # 3. Summary
316
+ if trace.get("summary"):
317
+ s = trace["summary"]
318
+ usage = s.get("usage", {})
319
+ main_cfg = self.config.get_model_config("main")
320
+ input_price = main_cfg.get("input_price") or 0
321
+ output_price = main_cfg.get("output_price") or 0
322
+ cost = (usage.get("input_tokens", 0) * input_price + usage.get("output_tokens", 0) * output_price) / 1_000_000
323
+
324
+ stages.append({
325
+ "name": "Summary",
326
+ "model": s.get("model"),
327
+ "icon_config": "google",
328
+ "provider": "Summary",
329
+ "time": s.get("time", 0),
330
+ "description": "Generated final answer.",
331
+ "usage": usage,
332
+ "cost": cost
333
+ })
334
+
335
+ return stages
336
+
337
+ def _render_trace_markdown(self, trace: Dict[str, Any]) -> str:
338
+ parts = ["# Pipeline Trace\n"]
339
+ if trace.get("instruct_rounds"):
340
+ parts.append(f"## Instruct ({len(trace['instruct_rounds'])} rounds)\n")
341
+ for i, r in enumerate(trace["instruct_rounds"]):
342
+ name = r.get("stage_name", f"Round {i+1}")
343
+ parts.append(f"### {name}\n" + str(r))
344
+ if trace.get("summary"):
345
+ parts.append("## Summary\n" + str(trace["summary"]))
346
+ return "\n".join(parts)
347
+
348
+ async def close(self):
349
+ try:
350
+ await self.search_service.close()
351
+ except: pass