entari-plugin-hyw 4.0.0rc7__py3-none-any.whl → 4.0.0rc9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (114) hide show
  1. entari_plugin_hyw/Untitled-1 +1865 -0
  2. entari_plugin_hyw/__init__.py +726 -394
  3. entari_plugin_hyw/history.py +26 -13
  4. entari_plugin_hyw/misc.py +3 -0
  5. entari_plugin_hyw/search_cache.py +154 -0
  6. {entari_plugin_hyw-4.0.0rc7.dist-info → entari_plugin_hyw-4.0.0rc9.dist-info}/METADATA +3 -1
  7. entari_plugin_hyw-4.0.0rc9.dist-info/RECORD +68 -0
  8. {entari_plugin_hyw-4.0.0rc7.dist-info → entari_plugin_hyw-4.0.0rc9.dist-info}/WHEEL +1 -1
  9. {entari_plugin_hyw-4.0.0rc7.dist-info → entari_plugin_hyw-4.0.0rc9.dist-info}/top_level.txt +1 -0
  10. hyw_core/__init__.py +94 -0
  11. hyw_core/browser_control/__init__.py +65 -0
  12. hyw_core/browser_control/assets/card-dist/index.html +409 -0
  13. hyw_core/browser_control/assets/index.html +5691 -0
  14. hyw_core/browser_control/engines/__init__.py +17 -0
  15. {entari_plugin_hyw/browser → hyw_core/browser_control}/engines/duckduckgo.py +42 -8
  16. {entari_plugin_hyw/browser → hyw_core/browser_control}/engines/google.py +1 -1
  17. {entari_plugin_hyw/browser → hyw_core/browser_control}/manager.py +15 -8
  18. entari_plugin_hyw/render_vue.py → hyw_core/browser_control/renderer.py +29 -14
  19. {entari_plugin_hyw/browser → hyw_core/browser_control}/service.py +340 -112
  20. hyw_core/config.py +154 -0
  21. hyw_core/core.py +322 -0
  22. hyw_core/definitions.py +83 -0
  23. entari_plugin_hyw/modular_pipeline.py → hyw_core/pipeline.py +121 -97
  24. {entari_plugin_hyw → hyw_core}/search.py +19 -14
  25. hyw_core/stages/__init__.py +21 -0
  26. entari_plugin_hyw/stage_base.py → hyw_core/stages/base.py +2 -2
  27. entari_plugin_hyw/stage_summary.py → hyw_core/stages/summary.py +34 -11
  28. entari_plugin_hyw/assets/card-dist/index.html +0 -387
  29. entari_plugin_hyw/browser/__init__.py +0 -10
  30. entari_plugin_hyw/browser/engines/bing.py +0 -95
  31. entari_plugin_hyw/card-ui/.gitignore +0 -24
  32. entari_plugin_hyw/card-ui/README.md +0 -5
  33. entari_plugin_hyw/card-ui/index.html +0 -16
  34. entari_plugin_hyw/card-ui/package-lock.json +0 -2342
  35. entari_plugin_hyw/card-ui/package.json +0 -31
  36. entari_plugin_hyw/card-ui/public/logos/anthropic.svg +0 -1
  37. entari_plugin_hyw/card-ui/public/logos/cerebras.svg +0 -9
  38. entari_plugin_hyw/card-ui/public/logos/deepseek.png +0 -0
  39. entari_plugin_hyw/card-ui/public/logos/gemini.svg +0 -1
  40. entari_plugin_hyw/card-ui/public/logos/google.svg +0 -1
  41. entari_plugin_hyw/card-ui/public/logos/grok.png +0 -0
  42. entari_plugin_hyw/card-ui/public/logos/huggingface.png +0 -0
  43. entari_plugin_hyw/card-ui/public/logos/microsoft.svg +0 -15
  44. entari_plugin_hyw/card-ui/public/logos/minimax.png +0 -0
  45. entari_plugin_hyw/card-ui/public/logos/mistral.png +0 -0
  46. entari_plugin_hyw/card-ui/public/logos/nvida.png +0 -0
  47. entari_plugin_hyw/card-ui/public/logos/openai.svg +0 -1
  48. entari_plugin_hyw/card-ui/public/logos/openrouter.png +0 -0
  49. entari_plugin_hyw/card-ui/public/logos/perplexity.svg +0 -24
  50. entari_plugin_hyw/card-ui/public/logos/qwen.png +0 -0
  51. entari_plugin_hyw/card-ui/public/logos/xai.png +0 -0
  52. entari_plugin_hyw/card-ui/public/logos/xiaomi.png +0 -0
  53. entari_plugin_hyw/card-ui/public/logos/zai.png +0 -0
  54. entari_plugin_hyw/card-ui/public/vite.svg +0 -1
  55. entari_plugin_hyw/card-ui/src/App.vue +0 -787
  56. entari_plugin_hyw/card-ui/src/assets/vue.svg +0 -1
  57. entari_plugin_hyw/card-ui/src/components/HelloWorld.vue +0 -41
  58. entari_plugin_hyw/card-ui/src/components/MarkdownContent.vue +0 -382
  59. entari_plugin_hyw/card-ui/src/components/SectionCard.vue +0 -41
  60. entari_plugin_hyw/card-ui/src/components/StageCard.vue +0 -240
  61. entari_plugin_hyw/card-ui/src/main.ts +0 -5
  62. entari_plugin_hyw/card-ui/src/style.css +0 -29
  63. entari_plugin_hyw/card-ui/src/test_regex.js +0 -103
  64. entari_plugin_hyw/card-ui/src/types.ts +0 -61
  65. entari_plugin_hyw/card-ui/tsconfig.app.json +0 -16
  66. entari_plugin_hyw/card-ui/tsconfig.json +0 -7
  67. entari_plugin_hyw/card-ui/tsconfig.node.json +0 -26
  68. entari_plugin_hyw/card-ui/vite.config.ts +0 -16
  69. entari_plugin_hyw/definitions.py +0 -174
  70. entari_plugin_hyw/stage_instruct.py +0 -355
  71. entari_plugin_hyw/stage_instruct_deepsearch.py +0 -104
  72. entari_plugin_hyw/stage_vision.py +0 -113
  73. entari_plugin_hyw-4.0.0rc7.dist-info/RECORD +0 -102
  74. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/anthropic.svg +0 -0
  75. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/cerebras.svg +0 -0
  76. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/deepseek.png +0 -0
  77. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/gemini.svg +0 -0
  78. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/google.svg +0 -0
  79. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/grok.png +0 -0
  80. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/huggingface.png +0 -0
  81. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/microsoft.svg +0 -0
  82. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/minimax.png +0 -0
  83. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/mistral.png +0 -0
  84. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/nvida.png +0 -0
  85. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/openai.svg +0 -0
  86. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/openrouter.png +0 -0
  87. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/perplexity.svg +0 -0
  88. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/qwen.png +0 -0
  89. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/xai.png +0 -0
  90. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/xiaomi.png +0 -0
  91. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/logos/zai.png +0 -0
  92. {entari_plugin_hyw → hyw_core/browser_control}/assets/card-dist/vite.svg +0 -0
  93. {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/anthropic.svg +0 -0
  94. {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/cerebras.svg +0 -0
  95. {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/deepseek.png +0 -0
  96. {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/gemini.svg +0 -0
  97. {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/google.svg +0 -0
  98. {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/grok.png +0 -0
  99. {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/huggingface.png +0 -0
  100. {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/microsoft.svg +0 -0
  101. {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/minimax.png +0 -0
  102. {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/mistral.png +0 -0
  103. {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/nvida.png +0 -0
  104. {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/openai.svg +0 -0
  105. {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/openrouter.png +0 -0
  106. {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/perplexity.svg +0 -0
  107. {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/qwen.png +0 -0
  108. {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/xai.png +0 -0
  109. {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/xiaomi.png +0 -0
  110. {entari_plugin_hyw/assets/icon → hyw_core/browser_control/assets/logos}/zai.png +0 -0
  111. {entari_plugin_hyw/browser → hyw_core/browser_control}/engines/base.py +0 -0
  112. {entari_plugin_hyw/browser → hyw_core/browser_control}/engines/default.py +0 -0
  113. {entari_plugin_hyw/browser → hyw_core/browser_control}/landing.html +0 -0
  114. {entari_plugin_hyw → hyw_core}/image_cache.py +0 -0
@@ -7,16 +7,15 @@ Simpler flow with self-correction/feedback loop.
7
7
 
8
8
  import asyncio
9
9
  import time
10
+ import re
10
11
  from typing import Any, Dict, List, Optional, Callable, Awaitable
11
12
 
12
13
  from loguru import logger
13
14
  from openai import AsyncOpenAI
14
15
 
15
- from .stage_base import StageContext
16
- from .stage_instruct import InstructStage
17
- from .stage_instruct_deepsearch import InstructDeepsearchStage
18
- from .stage_summary import SummaryStage
19
- from .stage_vision import VisionStage
16
+ from .stages.base import StageContext, StageResult
17
+ from .stages.base import StageContext, StageResult, BaseStage
18
+ from .stages.summary import SummaryStage
20
19
  from .search import SearchService
21
20
 
22
21
 
@@ -25,27 +24,31 @@ class ModularPipeline:
25
24
  Modular Pipeline.
26
25
 
27
26
  Flow:
28
- 1. Instruct: Initial Discovery + Mode Decision (fast/deepsearch).
29
- 2. [Deepsearch only] Instruct Deepsearch Loop: Supplement info (max 3 iterations).
30
- 3. Summary: Generate final response.
27
+ 1. Input Analysis:
28
+ - If Images -> Skip Search -> Summary
29
+ - If Text -> Execute Search (or URL fetch) -> Summary
30
+ 2. Summary: Generate final response.
31
31
  """
32
32
 
33
- def __init__(self, config: Any, send_func: Optional[Callable[[str], Awaitable[None]]] = None):
33
+ def __init__(self, config: Any, search_service: SearchService, send_func: Optional[Callable[[str], Awaitable[None]]] = None):
34
34
  self.config = config
35
35
  self.send_func = send_func
36
- self.search_service = SearchService(config)
36
+ self.search_service = search_service
37
37
  self.client = AsyncOpenAI(base_url=config.base_url, api_key=config.api_key)
38
38
 
39
39
  # Initialize stages
40
- self.instruct_stage = InstructStage(config, self.search_service, self.client, send_func=send_func)
41
- self.instruct_deepsearch_stage = InstructDeepsearchStage(config, self.search_service, self.client)
42
40
  self.summary_stage = SummaryStage(config, self.search_service, self.client)
43
- self.vision_stage = VisionStage(config, self.search_service, self.client)
44
41
 
45
- def _has_vision_model(self) -> bool:
46
- """Check if a vision model is configured."""
47
- vision_cfg = self.config.get_model_config("vision")
48
- return bool(vision_cfg.get("model_name"))
42
+ @property
43
+ def _send_func(self) -> Optional[Callable[[str], Awaitable[None]]]:
44
+ """Getter for _send_func (alias for send_func)."""
45
+ return self.send_func
46
+
47
+ @_send_func.setter
48
+ def _send_func(self, value: Optional[Callable[[str], Awaitable[None]]]):
49
+ """Setter for _send_func - updates send_func and propagates to stages."""
50
+ self.send_func = value
51
+
49
52
 
50
53
  async def execute(
51
54
  self,
@@ -53,8 +56,6 @@ class ModularPipeline:
53
56
  conversation_history: List[Dict],
54
57
  model_name: str = None,
55
58
  images: List[str] = None,
56
- vision_model_name: str = None,
57
- selected_vision_model: str = None,
58
59
  ) -> Dict[str, Any]:
59
60
  """Execute the modular pipeline."""
60
61
  start_time = time.time()
@@ -63,7 +64,7 @@ class ModularPipeline:
63
64
  active_model = model_name or self.config.model_name
64
65
  if not active_model:
65
66
  # Fallback to instruct model for logging/context
66
- active_model = self.config.get_model_config("instruct").get("model_name")
67
+ active_model = self.config.get_model_config("instruct").model_name
67
68
 
68
69
  context = StageContext(
69
70
  user_input=user_input,
@@ -89,59 +90,92 @@ class ModularPipeline:
89
90
  try:
90
91
  logger.info(f"Pipeline: Processing '{user_input[:30]}...'")
91
92
 
92
- # === Stage 0: Vision (if images and vision model configured) ===
93
- if images and self._has_vision_model():
94
- logger.info("Pipeline: Stage 0 - Vision (generating image description)")
95
- vision_result = await self.vision_stage.execute(context, images)
93
+ # === Image-First Logic ===
94
+ # When user provides images, skip search and go directly to Instruct
95
+ # Images will be passed through to both Instruct and Summary stages
96
+ has_user_images = bool(images)
97
+ if has_user_images:
98
+ logger.info(f"Pipeline: {len(images)} user image(s) detected. Skipping search -> Instruct.")
99
+
100
+ # === Search-First Logic (only when no images) ===
101
+ # 1. URL Detection
102
+ # Updated to capture full URLs including queries and paths
103
+ url_pattern = re.compile(r'https?://(?:[-\w./?=&%#]+)')
104
+ found_urls = url_pattern.findall(user_input)
105
+
106
+ hit_content = False
107
+
108
+ # Skip URL fetch and search if user provided images or long query
109
+ is_long_query = len(user_input) > 20
110
+ if has_user_images:
111
+ hit_content = False # Force into Instruct path
112
+ elif is_long_query:
113
+ logger.info(f"Pipeline: Long query ({len(user_input)} chars). Skipping direct search/fetch -> Instruct.")
114
+ hit_content = False
115
+ elif found_urls:
116
+ logger.info(f"Pipeline: Detected {len(found_urls)} URLs. Executing direct fetch...")
117
+ # Fetch pages (borrowing logic from InstructStage's batch fetch would be ideal,
118
+ # but we'll use search_service directly and simulate what Instruct did for context)
96
119
 
97
- if vision_result.success and vision_result.data.get("description"):
98
- context.vision_description = vision_result.data["description"]
99
- logger.info(f"Pipeline: Vision description generated ({len(context.vision_description)} chars)")
100
-
101
- # Add vision trace
102
- trace["vision"] = vision_result.trace
103
- usage_totals["input_tokens"] += vision_result.usage.get("input_tokens", 0)
104
- usage_totals["output_tokens"] += vision_result.usage.get("output_tokens", 0)
105
-
106
- # Clear images since we have the description now
107
- # (don't pass raw images to later stages when using vision model)
108
- images = []
109
-
110
- # === Stage 1: Instruct (Initial Discovery) ===
111
- logger.info("Pipeline: Stage 1 - Instruct")
112
- instruct_result = await self.instruct_stage.execute(context)
113
-
114
- # Trace & Usage
115
- instruct_result.trace["stage_name"] = "Instruct (Round 1)"
116
- trace["instruct_rounds"].append(instruct_result.trace)
117
- usage_totals["input_tokens"] += instruct_result.usage.get("input_tokens", 0)
118
- usage_totals["output_tokens"] += instruct_result.usage.get("output_tokens", 0)
119
-
120
- # Check refuse
121
- if context.should_refuse:
122
- return self._build_refusal_response(context, conversation_history, active_model, stats)
120
+ # Fetch
121
+ fetch_results = await self.search_service.fetch_pages_batch(found_urls)
122
+
123
+ # Pre-render screenshots if needed (similar to InstructStage logic)
124
+ # For brevity/cleanliness, assuming fetch_pages_batch returns what we need or we process it.
125
+ # Ideally we want screenshots for the UI. The serivce.fetch_page usually returns raw data.
126
+ # We need to render them if we want screenshots.
127
+ # To keep it simple for this file, we'll skip complex screenshot rendering here OR
128
+ # we rely on the summary stage to just use the text.
129
+ # But the user logic implies "Search/Fetch Hit -> Summary".
130
+
131
+ # Let's populate context.web_results
132
+ for i, page_data in enumerate(fetch_results):
133
+ if page_data.get("content"):
134
+ hit_content = True
135
+ context.web_results.append({
136
+ "_id": context.next_id(),
137
+ "_type": "page",
138
+ "title": page_data.get("title", "Page"),
139
+ "url": page_data.get("url", found_urls[i]),
140
+ "content": page_data.get("content", ""),
141
+ "images": page_data.get("images", []),
142
+ # For now, no screenshot unless we call renderer.
143
+ # If critical, we can add it later.
144
+ })
123
145
 
124
- # === Stage 2: Deepsearch Loop (if mode is deepsearch) ===
125
- if context.selected_mode == "deepsearch":
126
- MAX_DEEPSEARCH_ITERATIONS = 3
127
- logger.info(f"Pipeline: Mode is 'deepsearch', starting loop (max {MAX_DEEPSEARCH_ITERATIONS} iterations)")
146
+ # 2. Search (if no URLs or just always try search if simple query?)
147
+ # The prompt says: "judging result quantity > 0".
148
+ if not hit_content and not has_user_images and not is_long_query and user_input.strip():
149
+ logger.info("Pipeline: No URLs found or fetched. Executing direct search...")
150
+ search_start = time.time()
151
+ search_results = await self.search_service.search(user_input)
152
+ context.search_time = time.time() - search_start
128
153
 
129
- for i in range(MAX_DEEPSEARCH_ITERATIONS):
130
- logger.info(f"Pipeline: Stage 2 - Deepsearch Iteration {i + 1}")
131
- deepsearch_result = await self.instruct_deepsearch_stage.execute(context)
132
-
133
- # Trace & Usage
134
- deepsearch_result.trace["stage_name"] = f"Deepsearch (Iteration {i + 1})"
135
- trace["instruct_rounds"].append(deepsearch_result.trace)
136
- usage_totals["input_tokens"] += deepsearch_result.usage.get("input_tokens", 0)
137
- usage_totals["output_tokens"] += deepsearch_result.usage.get("output_tokens", 0)
138
-
139
- # Check if should stop
140
- if deepsearch_result.data.get("should_stop"):
141
- logger.info(f"Pipeline: Deepsearch loop ended at iteration {i + 1}")
142
- break
143
- else:
144
- logger.info("Pipeline: Mode is 'fast', skipping deepsearch stage")
154
+ # Filter out the raw debug page
155
+ valid_results = [r for r in search_results if not r.get("_hidden")]
156
+
157
+ if valid_results:
158
+ logger.info(f"Pipeline: Search found {len(valid_results)} results in {context.search_time:.2f}s. Proceeding to Summary.")
159
+ hit_content = True
160
+ for item in search_results: # Add all, including hidden debug ones if needed by history
161
+ item["_id"] = context.next_id()
162
+ if "_type" not in item: item["_type"] = "search"
163
+ item["query"] = user_input
164
+ context.web_results.append(item)
165
+ else:
166
+ logger.info("Pipeline: Search yielded 0 results.")
167
+
168
+ # === Branching ===
169
+ if hit_content and not has_user_images:
170
+ # -> Summary Stage (search/URL results available)
171
+ logger.info("Pipeline: Content found (URL/Search). Proceeding to Summary.")
172
+
173
+ # If no content was found and no images, we still proceed to Summary but with empty context (Direct Chat)
174
+ # If images, we proceed to Summary with images.
175
+
176
+ # Refusal check from search results? (Unlikely, but good to keep in mind)
177
+ pass
178
+
145
179
 
146
180
  # === Parallel Execution: Summary Generation + Image Prefetching ===
147
181
  # We run image prefetching concurrently with Summary generation to save time.
@@ -169,6 +203,9 @@ class ModularPipeline:
169
203
  if r.get("_type") == "page" and r.get("screenshot_b64"):
170
204
  summary_input_images.append(r["screenshot_b64"])
171
205
 
206
+ if context.should_refuse:
207
+ return StageResult(success=True, data={"content": "Refused"}, usage={}, trace={}), 0.0
208
+
172
209
  res = await self.summary_stage.execute(
173
210
  context,
174
211
  images=summary_input_images if summary_input_images else None
@@ -204,6 +241,10 @@ class ModularPipeline:
204
241
  summary_result, summary_time = await summary_task
205
242
  cached_map, prefetch_time = await prefetch_task
206
243
 
244
+ if context.should_refuse:
245
+ # Double check if summary triggered refusal
246
+ return self._build_refusal_response(context, conversation_history, active_model, stats)
247
+
207
248
  time_diff = abs(summary_time - prefetch_time)
208
249
  if summary_time > prefetch_time:
209
250
  logger.info(f"Pipeline: Image Prefetch finished first ({prefetch_time:.2f}s). Summary took {summary_time:.2f}s. (Waited {time_diff:.2f}s for Summary)")
@@ -268,7 +309,8 @@ class ModularPipeline:
268
309
  },
269
310
  "stages_used": stages_used,
270
311
  "web_results": context.web_results,
271
- "vision_trace": trace.get("vision"),
312
+ "trace": trace,
313
+
272
314
  "instruct_traces": trace.get("instruct_rounds", []),
273
315
  }
274
316
 
@@ -378,6 +420,8 @@ class ModularPipeline:
378
420
  # Sort: Fetched first
379
421
  search_refs.sort(key=lambda x: x["is_fetched"], reverse=True)
380
422
 
423
+ logger.debug(f"_build_stages_ui: Found {len(search_refs)} search refs from {len(context.web_results)} web_results")
424
+
381
425
  if search_refs:
382
426
  stages.append({
383
427
  "name": "Search",
@@ -385,30 +429,10 @@ class ModularPipeline:
385
429
  "icon_config": "openai",
386
430
  "provider": "Web",
387
431
  "references": search_refs,
388
- "description": f"Found {len(search_refs)} results."
432
+ "description": f"Found {len(search_refs)} results.",
433
+ "time": getattr(context, 'search_time', 0)
389
434
  })
390
435
 
391
- # 2. Vision Stage (if used)
392
- if trace.get("vision"):
393
- v = trace["vision"]
394
- if not v.get("skipped"):
395
- usage = v.get("usage", {})
396
- vision_cfg = self.config.get_model_config("vision")
397
- input_price = vision_cfg.get("input_price") or 0
398
- output_price = vision_cfg.get("output_price") or 0
399
- cost = (usage.get("input_tokens", 0) * input_price + usage.get("output_tokens", 0) * output_price) / 1_000_000
400
-
401
- stages.append({
402
- "name": "Vision",
403
- "model": v.get("model"),
404
- "icon_config": "google",
405
- "provider": "Vision",
406
- "time": v.get("time", 0),
407
- "description": f"Analyzed {v.get('images_count', 0)} image(s).",
408
- "usage": usage,
409
- "cost": cost
410
- })
411
-
412
436
  # 2. Instruct Rounds
413
437
  for i, t in enumerate(trace.get("instruct_rounds", [])):
414
438
  stage_name = t.get("stage_name", f"Analysis {i+1}")
@@ -424,8 +448,8 @@ class ModularPipeline:
424
448
  # Calculate cost from config prices
425
449
  usage = t.get("usage", {})
426
450
  instruct_cfg = self.config.get_model_config("instruct")
427
- input_price = instruct_cfg.get("input_price") or 0
428
- output_price = instruct_cfg.get("output_price") or 0
451
+ input_price = instruct_cfg.input_price or 0
452
+ output_price = instruct_cfg.output_price or 0
429
453
  cost = (usage.get("input_tokens", 0) * input_price + usage.get("output_tokens", 0) * output_price) / 1_000_000
430
454
 
431
455
  stages.append({
@@ -444,8 +468,8 @@ class ModularPipeline:
444
468
  s = trace["summary"]
445
469
  usage = s.get("usage", {})
446
470
  main_cfg = self.config.get_model_config("main")
447
- input_price = main_cfg.get("input_price") or 0
448
- output_price = main_cfg.get("output_price") or 0
471
+ input_price = main_cfg.input_price or 0
472
+ output_price = main_cfg.output_price or 0
449
473
  cost = (usage.get("input_tokens", 0) * input_price + usage.get("output_tokens", 0) * output_price) / 1_000_000
450
474
 
451
475
  stages.append({
@@ -5,12 +5,11 @@ import time
5
5
  from typing import List, Dict, Any, Optional
6
6
  from loguru import logger
7
7
 
8
- from .browser.service import get_screenshot_service
9
- # New engines
10
- from .browser.engines.bing import BingEngine
11
- from .browser.engines.duckduckgo import DuckDuckGoEngine
12
- from .browser.engines.google import GoogleEngine
13
- from .browser.engines.default import DefaultEngine
8
+ from .browser_control.service import get_screenshot_service
9
+ # Search engines from browser_control subpackage
10
+ from .browser_control.engines.duckduckgo import DuckDuckGoEngine
11
+ from .browser_control.engines.google import GoogleEngine
12
+ from .browser_control.engines.default import DefaultEngine
14
13
 
15
14
  class SearchService:
16
15
  def __init__(self, config: Any):
@@ -27,16 +26,14 @@ class SearchService:
27
26
  if self._engine_name:
28
27
  self._engine_name = self._engine_name.lower()
29
28
 
30
- if self._engine_name == "bing":
31
- self._engine = BingEngine()
32
- elif self._engine_name == "google":
29
+ if self._engine_name == "google":
33
30
  self._engine = GoogleEngine()
34
- elif self._engine_name == "duckduckgo":
35
- self._engine = DuckDuckGoEngine()
31
+ elif self._engine_name == "default_address_bar": # Explicitly requested address bar capability if needed
32
+ self._engine = DefaultEngine()
36
33
  else:
37
- # Default: use browser address bar search (Google-based)
38
- self._engine = DefaultEngine()
39
- self._engine_name = "default"
34
+ # Default: use DuckDuckGo
35
+ self._engine = DuckDuckGoEngine()
36
+ self._engine_name = "duckduckgo"
40
37
 
41
38
  logger.info(f"SearchService initialized with engine: {self._engine_name}")
42
39
 
@@ -156,3 +153,11 @@ class SearchService:
156
153
  timeout = self._fetch_timeout
157
154
  service = get_screenshot_service(headless=self._headless)
158
155
  return await service.fetch_page(url, timeout=timeout, include_screenshot=include_screenshot)
156
+
157
+ async def screenshot_url(self, url: str, full_page: bool = True) -> Optional[str]:
158
+ """
159
+ Capture a screenshot of a URL.
160
+ Delegates to screenshot service.
161
+ """
162
+ service = get_screenshot_service(headless=self._headless)
163
+ return await service.screenshot_url(url, full_page=full_page)
@@ -0,0 +1,21 @@
1
+ """
2
+ hyw_core.stages - Pipeline Stages
3
+
4
+ This subpackage provides the pipeline stage implementations:
5
+ - BaseStage: Abstract base class for all stages
6
+ - StageContext: Shared context between stages
7
+ - StageResult: Stage execution result
8
+ - InstructStage: Initial task planning and search execution
9
+ - SummaryStage: Final response generation
10
+ """
11
+
12
+ from .base import BaseStage, StageContext, StageResult
13
+
14
+ from .summary import SummaryStage
15
+
16
+ __all__ = [
17
+ "BaseStage",
18
+ "StageContext",
19
+ "StageResult",
20
+ "SummaryStage",
21
+ ]
@@ -39,8 +39,8 @@ class StageContext:
39
39
  # Model capabilities
40
40
  image_input_supported: bool = True
41
41
 
42
- # Vision description (from VisionStage)
43
- vision_description: str = ""
42
+ # Search timing
43
+ search_time: float = 0.0
44
44
 
45
45
  def next_id(self) -> int:
46
46
  """Get next global ID."""
@@ -12,8 +12,8 @@ from typing import Any, Dict, List, Optional
12
12
  from loguru import logger
13
13
  from openai import AsyncOpenAI
14
14
 
15
- from .stage_base import BaseStage, StageContext, StageResult
16
- from .definitions import SUMMARY_REPORT_SP
15
+ from .base import BaseStage, StageContext, StageResult
16
+ from ..definitions import SUMMARY_REPORT_SP, get_refuse_answer_tool
17
17
 
18
18
 
19
19
  class SummaryStage(BaseStage):
@@ -35,6 +35,9 @@ class SummaryStage(BaseStage):
35
35
 
36
36
  # Format context from web results
37
37
  web_content = self._format_web_content(context)
38
+
39
+ # Tools
40
+ refuse_tool = get_refuse_answer_tool()
38
41
  full_context = f"{context.agent_context}\n\n{web_content}"
39
42
 
40
43
  # Select prompt
@@ -47,15 +50,13 @@ class SummaryStage(BaseStage):
47
50
  # Build Context Message
48
51
  context_message = f"## Web Search & Page Content\n\n```context\n{full_context}\n```"
49
52
 
50
- # Add vision description if present (from VisionStage)
51
- if context.vision_description:
52
- vision_context = f"## 用户图片描述\n\n{context.vision_description}"
53
- context_message = f"{vision_context}\n\n{context_message}"
54
53
 
55
54
  # Build user content
56
55
  user_text = context.user_input or "..."
57
56
  if images:
58
- user_content: List[Dict[str, Any]] = [{"type": "text", "text": user_text}]
57
+ # Add image context message for multimodal input
58
+ image_context = f"[System: The user has provided {len(images)} image(s). Please analyze these images together with the text query to provide a comprehensive response.]"
59
+ user_content: List[Dict[str, Any]] = [{"type": "text", "text": f"{image_context}\n\n{user_text}"}]
59
60
  for img_b64 in images:
60
61
  url = f"data:image/jpeg;base64,{img_b64}" if not img_b64.startswith("data:") else img_b64
61
62
  user_content.append({"type": "image_url", "image_url": {"url": url}})
@@ -72,18 +73,21 @@ class SummaryStage(BaseStage):
72
73
  model_cfg = self.config.get_model_config("main")
73
74
 
74
75
  client = self._client_for(
75
- api_key=model_cfg.get("api_key"),
76
- base_url=model_cfg.get("base_url")
76
+ api_key=model_cfg.api_key,
77
+ base_url=model_cfg.base_url
77
78
  )
78
79
 
79
- model = model_cfg.get("model_name") or self.config.model_name
80
+ model = model_cfg.model_name or self.config.model_name
80
81
 
81
82
  try:
82
83
  response = await client.chat.completions.create(
83
84
  model=model,
84
85
  messages=messages,
85
86
  temperature=self.config.temperature,
87
+
86
88
  extra_body=getattr(self.config, "summary_extra_body", None),
89
+ tools=[refuse_tool],
90
+ tool_choice="auto",
87
91
  )
88
92
  except Exception as e:
89
93
  logger.error(f"SummaryStage LLM error: {e}")
@@ -98,6 +102,25 @@ class SummaryStage(BaseStage):
98
102
  usage["input_tokens"] = getattr(response.usage, "prompt_tokens", 0) or 0
99
103
  usage["output_tokens"] = getattr(response.usage, "completion_tokens", 0) or 0
100
104
 
105
+ # Handle Tool Calls (Refusal)
106
+ tool_calls = response.choices[0].message.tool_calls
107
+ if tool_calls:
108
+ for tc in tool_calls:
109
+ if tc.function.name == "refuse_answer":
110
+ import json
111
+ try:
112
+ args = json.loads(tc.function.arguments)
113
+ reason = args.get("reason", "Refused")
114
+ context.should_refuse = True
115
+ context.refuse_reason = reason
116
+ return StageResult(
117
+ success=True,
118
+ data={"content": f"Refused: {reason}"},
119
+ usage=usage,
120
+ trace={"skipped": True, "reason": reason}
121
+ )
122
+ except: pass
123
+
101
124
  content = (response.choices[0].message.content or "").strip()
102
125
 
103
126
  return StageResult(
@@ -106,7 +129,7 @@ class SummaryStage(BaseStage):
106
129
  usage=usage,
107
130
  trace={
108
131
  "model": model,
109
- "provider": model_cfg.get("model_provider") or "Unknown",
132
+ "provider": model_cfg.model_provider or "Unknown",
110
133
  "usage": usage,
111
134
  "system_prompt": system_prompt,
112
135
  "context_message": context_message, # Includes vision description + search results