entari-plugin-hyw 0.3.5__py3-none-any.whl → 4.0.0rc14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of entari-plugin-hyw might be problematic. Click here for more details.
- entari_plugin_hyw/Untitled-1 +1865 -0
- entari_plugin_hyw/__init__.py +979 -116
- entari_plugin_hyw/filters.py +83 -0
- entari_plugin_hyw/history.py +251 -0
- entari_plugin_hyw/misc.py +214 -0
- entari_plugin_hyw/search_cache.py +154 -0
- entari_plugin_hyw-4.0.0rc14.dist-info/METADATA +118 -0
- entari_plugin_hyw-4.0.0rc14.dist-info/RECORD +72 -0
- {entari_plugin_hyw-0.3.5.dist-info → entari_plugin_hyw-4.0.0rc14.dist-info}/WHEEL +1 -1
- {entari_plugin_hyw-0.3.5.dist-info → entari_plugin_hyw-4.0.0rc14.dist-info}/top_level.txt +1 -0
- hyw_core/__init__.py +94 -0
- hyw_core/agent.py +768 -0
- hyw_core/browser_control/__init__.py +63 -0
- hyw_core/browser_control/assets/card-dist/index.html +425 -0
- hyw_core/browser_control/assets/card-dist/logos/anthropic.svg +1 -0
- hyw_core/browser_control/assets/card-dist/logos/cerebras.svg +9 -0
- hyw_core/browser_control/assets/card-dist/logos/deepseek.png +0 -0
- hyw_core/browser_control/assets/card-dist/logos/gemini.svg +1 -0
- hyw_core/browser_control/assets/card-dist/logos/google.svg +1 -0
- hyw_core/browser_control/assets/card-dist/logos/grok.png +0 -0
- hyw_core/browser_control/assets/card-dist/logos/huggingface.png +0 -0
- hyw_core/browser_control/assets/card-dist/logos/microsoft.svg +15 -0
- hyw_core/browser_control/assets/card-dist/logos/minimax.png +0 -0
- hyw_core/browser_control/assets/card-dist/logos/mistral.png +0 -0
- hyw_core/browser_control/assets/card-dist/logos/nvida.png +0 -0
- hyw_core/browser_control/assets/card-dist/logos/openai.svg +1 -0
- hyw_core/browser_control/assets/card-dist/logos/openrouter.png +0 -0
- hyw_core/browser_control/assets/card-dist/logos/perplexity.svg +24 -0
- hyw_core/browser_control/assets/card-dist/logos/qwen.png +0 -0
- hyw_core/browser_control/assets/card-dist/logos/xai.png +0 -0
- hyw_core/browser_control/assets/card-dist/logos/xiaomi.png +0 -0
- hyw_core/browser_control/assets/card-dist/logos/zai.png +0 -0
- hyw_core/browser_control/assets/card-dist/vite.svg +1 -0
- hyw_core/browser_control/assets/index.html +5691 -0
- hyw_core/browser_control/assets/logos/anthropic.svg +1 -0
- hyw_core/browser_control/assets/logos/cerebras.svg +9 -0
- hyw_core/browser_control/assets/logos/deepseek.png +0 -0
- hyw_core/browser_control/assets/logos/gemini.svg +1 -0
- hyw_core/browser_control/assets/logos/google.svg +1 -0
- hyw_core/browser_control/assets/logos/grok.png +0 -0
- hyw_core/browser_control/assets/logos/huggingface.png +0 -0
- hyw_core/browser_control/assets/logos/microsoft.svg +15 -0
- hyw_core/browser_control/assets/logos/minimax.png +0 -0
- hyw_core/browser_control/assets/logos/mistral.png +0 -0
- hyw_core/browser_control/assets/logos/nvida.png +0 -0
- hyw_core/browser_control/assets/logos/openai.svg +1 -0
- hyw_core/browser_control/assets/logos/openrouter.png +0 -0
- hyw_core/browser_control/assets/logos/perplexity.svg +24 -0
- hyw_core/browser_control/assets/logos/qwen.png +0 -0
- hyw_core/browser_control/assets/logos/xai.png +0 -0
- hyw_core/browser_control/assets/logos/xiaomi.png +0 -0
- hyw_core/browser_control/assets/logos/zai.png +0 -0
- hyw_core/browser_control/engines/__init__.py +15 -0
- hyw_core/browser_control/engines/base.py +13 -0
- hyw_core/browser_control/engines/default.py +166 -0
- hyw_core/browser_control/engines/duckduckgo.py +171 -0
- hyw_core/browser_control/landing.html +172 -0
- hyw_core/browser_control/manager.py +173 -0
- hyw_core/browser_control/renderer.py +446 -0
- hyw_core/browser_control/service.py +940 -0
- hyw_core/config.py +154 -0
- hyw_core/core.py +462 -0
- hyw_core/crawling/__init__.py +18 -0
- hyw_core/crawling/completeness.py +437 -0
- hyw_core/crawling/models.py +88 -0
- hyw_core/definitions.py +104 -0
- hyw_core/image_cache.py +274 -0
- hyw_core/pipeline.py +502 -0
- hyw_core/search.py +171 -0
- hyw_core/stages/__init__.py +21 -0
- hyw_core/stages/base.py +95 -0
- hyw_core/stages/summary.py +191 -0
- entari_plugin_hyw/agent.py +0 -419
- entari_plugin_hyw/compressor.py +0 -59
- entari_plugin_hyw/tools.py +0 -236
- entari_plugin_hyw/vision.py +0 -35
- entari_plugin_hyw-0.3.5.dist-info/METADATA +0 -112
- entari_plugin_hyw-0.3.5.dist-info/RECORD +0 -9
hyw_core/config.py
ADDED
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
"""
|
|
2
|
+
hyw_core.config - Configuration Management
|
|
3
|
+
|
|
4
|
+
Provides standalone configuration for hyw-core with optional passthrough from parent packages.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
from dataclasses import dataclass, field
|
|
8
|
+
from typing import Dict, List, Any, Optional
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class ModelConfig:
|
|
13
|
+
"""Configuration for a specific model."""
|
|
14
|
+
model_name: Optional[str] = None
|
|
15
|
+
api_key: Optional[str] = None
|
|
16
|
+
base_url: Optional[str] = None
|
|
17
|
+
extra_body: Optional[Dict[str, Any]] = None
|
|
18
|
+
model_provider: Optional[str] = None
|
|
19
|
+
input_price: Optional[float] = None
|
|
20
|
+
output_price: Optional[float] = None
|
|
21
|
+
image_input: bool = True
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class HywCoreConfig:
|
|
26
|
+
"""
|
|
27
|
+
Core configuration for hyw-core.
|
|
28
|
+
|
|
29
|
+
Can be used standalone or with passthrough from parent packages.
|
|
30
|
+
|
|
31
|
+
Usage:
|
|
32
|
+
# Standalone from YAML
|
|
33
|
+
config = HywCoreConfig.from_yaml("config.yaml")
|
|
34
|
+
|
|
35
|
+
# Passthrough from parent
|
|
36
|
+
config = HywCoreConfig.from_dict({
|
|
37
|
+
"model_name": parent_config.model_name,
|
|
38
|
+
"api_key": parent_config.api_key,
|
|
39
|
+
...
|
|
40
|
+
})
|
|
41
|
+
"""
|
|
42
|
+
|
|
43
|
+
# LLM Configuration
|
|
44
|
+
models: List[Dict[str, Any]] = field(default_factory=list)
|
|
45
|
+
model_name: str = ""
|
|
46
|
+
api_key: str = ""
|
|
47
|
+
base_url: str = ""
|
|
48
|
+
temperature: float = 0.4
|
|
49
|
+
|
|
50
|
+
# Stage-specific model overrides
|
|
51
|
+
instruct_model: Optional[str] = None
|
|
52
|
+
instruct_api_key: Optional[str] = None
|
|
53
|
+
instruct_base_url: Optional[str] = None
|
|
54
|
+
instruct_extra_body: Optional[Dict[str, Any]] = None
|
|
55
|
+
|
|
56
|
+
summary_model: Optional[str] = None
|
|
57
|
+
summary_api_key: Optional[str] = None
|
|
58
|
+
summary_base_url: Optional[str] = None
|
|
59
|
+
summary_extra_body: Optional[Dict[str, Any]] = None
|
|
60
|
+
|
|
61
|
+
# Search Configuration
|
|
62
|
+
search_engine: str = "duckduckgo"
|
|
63
|
+
search_limit: int = 10
|
|
64
|
+
blocked_domains: List[str] = field(default_factory=list)
|
|
65
|
+
|
|
66
|
+
# Browser Configuration
|
|
67
|
+
headless: bool = True
|
|
68
|
+
fetch_timeout: float = 20.0
|
|
69
|
+
|
|
70
|
+
# Output Configuration
|
|
71
|
+
language: str = "Simplified Chinese"
|
|
72
|
+
theme_color: str = "#ef4444"
|
|
73
|
+
|
|
74
|
+
# Pricing (for cost estimation)
|
|
75
|
+
input_price: float = 0.0
|
|
76
|
+
output_price: float = 0.0
|
|
77
|
+
|
|
78
|
+
@classmethod
|
|
79
|
+
def from_dict(cls, data: Dict[str, Any]) -> "HywCoreConfig":
|
|
80
|
+
"""
|
|
81
|
+
Create config from dictionary.
|
|
82
|
+
|
|
83
|
+
Used for passthrough from parent packages.
|
|
84
|
+
Filters out unknown fields to allow flexible passthrough.
|
|
85
|
+
"""
|
|
86
|
+
import dataclasses
|
|
87
|
+
field_names = {f.name for f in dataclasses.fields(cls)}
|
|
88
|
+
filtered_data = {k: v for k, v in data.items() if k in field_names}
|
|
89
|
+
return cls(**filtered_data)
|
|
90
|
+
|
|
91
|
+
@classmethod
|
|
92
|
+
def from_yaml(cls, path: str) -> "HywCoreConfig":
|
|
93
|
+
"""
|
|
94
|
+
Load config from YAML file.
|
|
95
|
+
|
|
96
|
+
Used for standalone usage.
|
|
97
|
+
"""
|
|
98
|
+
import yaml
|
|
99
|
+
with open(path, 'r', encoding='utf-8') as f:
|
|
100
|
+
data = yaml.safe_load(f) or {}
|
|
101
|
+
return cls.from_dict(data)
|
|
102
|
+
|
|
103
|
+
def get_model_config(self, stage: str) -> ModelConfig:
|
|
104
|
+
"""
|
|
105
|
+
Get resolved model config for a stage.
|
|
106
|
+
|
|
107
|
+
Args:
|
|
108
|
+
stage: "instruct", "qa", or "main" (summary)
|
|
109
|
+
|
|
110
|
+
Returns:
|
|
111
|
+
ModelConfig with resolved settings
|
|
112
|
+
"""
|
|
113
|
+
# Determine primary and secondary stage config keys
|
|
114
|
+
if stage == "instruct":
|
|
115
|
+
primary_prefix = "instruct_"
|
|
116
|
+
secondary_prefix = None
|
|
117
|
+
elif stage == "qa":
|
|
118
|
+
primary_prefix = "qa_"
|
|
119
|
+
secondary_prefix = "instruct_"
|
|
120
|
+
else: # "main" / summary
|
|
121
|
+
primary_prefix = "summary_"
|
|
122
|
+
secondary_prefix = None
|
|
123
|
+
|
|
124
|
+
def resolve(field_name: str, is_essential: bool = True):
|
|
125
|
+
"""Resolve a field with fallback: Primary -> Secondary -> Root."""
|
|
126
|
+
# Try primary
|
|
127
|
+
if primary_prefix:
|
|
128
|
+
val = getattr(self, f"{primary_prefix}{field_name}", None)
|
|
129
|
+
if val:
|
|
130
|
+
return val
|
|
131
|
+
|
|
132
|
+
# Try secondary
|
|
133
|
+
if secondary_prefix:
|
|
134
|
+
val = getattr(self, f"{secondary_prefix}{field_name}", None)
|
|
135
|
+
if val:
|
|
136
|
+
return val
|
|
137
|
+
|
|
138
|
+
# Fallback to root
|
|
139
|
+
return getattr(self, field_name, None)
|
|
140
|
+
|
|
141
|
+
return ModelConfig(
|
|
142
|
+
model_name=resolve("model") or resolve("model_name") or self.model_name,
|
|
143
|
+
api_key=resolve("api_key") or self.api_key,
|
|
144
|
+
base_url=resolve("base_url") or self.base_url,
|
|
145
|
+
extra_body=resolve("extra_body"),
|
|
146
|
+
model_provider=resolve("model_provider"),
|
|
147
|
+
input_price=resolve("input_price") or self.input_price,
|
|
148
|
+
output_price=resolve("output_price") or self.output_price,
|
|
149
|
+
)
|
|
150
|
+
|
|
151
|
+
def to_dict(self) -> Dict[str, Any]:
|
|
152
|
+
"""Convert config to dictionary."""
|
|
153
|
+
import dataclasses
|
|
154
|
+
return dataclasses.asdict(self)
|
hyw_core/core.py
ADDED
|
@@ -0,0 +1,462 @@
|
|
|
1
|
+
"""
|
|
2
|
+
hyw_core.core - Main HywCore Class
|
|
3
|
+
|
|
4
|
+
Provides the unified LLM query interface and search capabilities.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import asyncio
|
|
8
|
+
import time
|
|
9
|
+
from dataclasses import dataclass, field
|
|
10
|
+
from typing import Dict, List, Any, Optional, Callable, Awaitable
|
|
11
|
+
|
|
12
|
+
from loguru import logger
|
|
13
|
+
|
|
14
|
+
from .config import HywCoreConfig, ModelConfig
|
|
15
|
+
from .pipeline import ModularPipeline
|
|
16
|
+
from .agent import AgentPipeline
|
|
17
|
+
from .search import SearchService
|
|
18
|
+
from .stages.base import StageContext
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass
|
|
22
|
+
class QueryRequest:
|
|
23
|
+
"""Request for the query interface."""
|
|
24
|
+
user_input: str
|
|
25
|
+
images: List[str] = field(default_factory=list) # base64 encoded images
|
|
26
|
+
conversation_history: List[Dict] = field(default_factory=list)
|
|
27
|
+
model_name: Optional[str] = None # Override model
|
|
28
|
+
|
|
29
|
+
# Optional callbacks
|
|
30
|
+
send_notification: Optional[Callable[[str], Awaitable[None]]] = None
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class QueryResponse:
|
|
35
|
+
"""Response from the query interface."""
|
|
36
|
+
success: bool
|
|
37
|
+
content: str # Markdown response
|
|
38
|
+
image_path: Optional[str] = None # Path to rendered image
|
|
39
|
+
|
|
40
|
+
# Statistics
|
|
41
|
+
usage: Dict[str, int] = field(default_factory=dict)
|
|
42
|
+
cost: float = 0.0
|
|
43
|
+
total_time: float = 0.0
|
|
44
|
+
|
|
45
|
+
# References
|
|
46
|
+
references: List[Dict[str, Any]] = field(default_factory=list)
|
|
47
|
+
page_references: List[Dict[str, Any]] = field(default_factory=list)
|
|
48
|
+
image_references: List[Dict[str, Any]] = field(default_factory=list)
|
|
49
|
+
|
|
50
|
+
# Trace information
|
|
51
|
+
stages_trace: Dict[str, Any] = field(default_factory=dict)
|
|
52
|
+
|
|
53
|
+
# Error handling
|
|
54
|
+
error: Optional[str] = None
|
|
55
|
+
should_refuse: bool = False
|
|
56
|
+
refuse_reason: str = ""
|
|
57
|
+
|
|
58
|
+
# Debug/Save
|
|
59
|
+
web_results: List[Dict[str, Any]] = field(default_factory=list)
|
|
60
|
+
stages_used: List[Dict[str, Any]] = field(default_factory=list)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
class HywCore:
|
|
65
|
+
"""
|
|
66
|
+
HYW Core Service.
|
|
67
|
+
|
|
68
|
+
Provides the unified LLM query interface (/q command) and search capabilities.
|
|
69
|
+
|
|
70
|
+
Usage:
|
|
71
|
+
from hyw_core import HywCore, HywCoreConfig, QueryRequest
|
|
72
|
+
|
|
73
|
+
config = HywCoreConfig.from_yaml("config.yaml")
|
|
74
|
+
core = HywCore(config)
|
|
75
|
+
|
|
76
|
+
response = await core.query(QueryRequest(
|
|
77
|
+
user_input="What is Python?",
|
|
78
|
+
images=[],
|
|
79
|
+
conversation_history=[]
|
|
80
|
+
))
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
def __init__(
|
|
84
|
+
self,
|
|
85
|
+
config: HywCoreConfig,
|
|
86
|
+
send_func: Optional[Callable[[str], Awaitable[None]]] = None
|
|
87
|
+
):
|
|
88
|
+
"""
|
|
89
|
+
Initialize HywCore.
|
|
90
|
+
|
|
91
|
+
Args:
|
|
92
|
+
config: HywCoreConfig instance
|
|
93
|
+
send_func: Optional callback for sending notifications
|
|
94
|
+
"""
|
|
95
|
+
self.config = config
|
|
96
|
+
self._send_func = send_func
|
|
97
|
+
|
|
98
|
+
# Create search service
|
|
99
|
+
self._search_service = SearchService(config)
|
|
100
|
+
|
|
101
|
+
# Create pipeline (for non-agent mode)
|
|
102
|
+
self._pipeline = ModularPipeline(
|
|
103
|
+
config=config,
|
|
104
|
+
search_service=self._search_service,
|
|
105
|
+
send_func=send_func
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
# Agent pipeline (lazy init)
|
|
109
|
+
self._agent_pipeline = None
|
|
110
|
+
|
|
111
|
+
# Create renderer (lazy init)
|
|
112
|
+
self._renderer = None
|
|
113
|
+
|
|
114
|
+
logger.info("HywCore initialized")
|
|
115
|
+
|
|
116
|
+
async def _ensure_renderer(self):
|
|
117
|
+
"""Lazy initialize renderer."""
|
|
118
|
+
if self._renderer is None:
|
|
119
|
+
from .browser_control import ContentRenderer
|
|
120
|
+
self._renderer = ContentRenderer(headless=self.config.headless)
|
|
121
|
+
await self._renderer.start()
|
|
122
|
+
|
|
123
|
+
async def query(
|
|
124
|
+
self,
|
|
125
|
+
request: QueryRequest,
|
|
126
|
+
output_path: Optional[str] = None
|
|
127
|
+
) -> QueryResponse:
|
|
128
|
+
"""
|
|
129
|
+
Unified query interface.
|
|
130
|
+
|
|
131
|
+
This is the main entry point for /q commands.
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
request: QueryRequest with user input, images, history
|
|
135
|
+
output_path: Optional path to save rendered image
|
|
136
|
+
|
|
137
|
+
Returns:
|
|
138
|
+
QueryResponse with content, rendered image path, and metadata
|
|
139
|
+
"""
|
|
140
|
+
start_time = time.time()
|
|
141
|
+
|
|
142
|
+
try:
|
|
143
|
+
# Override model if specified
|
|
144
|
+
model_name = request.model_name or self.config.model_name
|
|
145
|
+
|
|
146
|
+
# Use notification callback from request if provided
|
|
147
|
+
send_func = request.send_notification or self._send_func
|
|
148
|
+
if send_func and self._pipeline._send_func != send_func:
|
|
149
|
+
self._pipeline._send_func = send_func
|
|
150
|
+
|
|
151
|
+
# Execute pipeline
|
|
152
|
+
result = await self._pipeline.execute(
|
|
153
|
+
user_input=request.user_input,
|
|
154
|
+
conversation_history=request.conversation_history,
|
|
155
|
+
model_name=model_name,
|
|
156
|
+
images=request.images if request.images else None
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
total_time = time.time() - start_time
|
|
160
|
+
|
|
161
|
+
# Check for refusal
|
|
162
|
+
if result.get("should_refuse"):
|
|
163
|
+
return QueryResponse(
|
|
164
|
+
success=True,
|
|
165
|
+
content="",
|
|
166
|
+
should_refuse=True,
|
|
167
|
+
refuse_reason=result.get("refuse_reason", ""),
|
|
168
|
+
total_time=total_time
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
# Extract response data
|
|
172
|
+
content = result.get("llm_response", "")
|
|
173
|
+
structured = result.get("structured_response", {})
|
|
174
|
+
billing = result.get("billing_info", {})
|
|
175
|
+
|
|
176
|
+
usage = {
|
|
177
|
+
"input_tokens": billing.get("input_tokens", 0),
|
|
178
|
+
"output_tokens": billing.get("output_tokens", 0)
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
# Calculate cost
|
|
182
|
+
model_cfg = self.config.get_model_config("main")
|
|
183
|
+
cost = (
|
|
184
|
+
usage["input_tokens"] * (model_cfg.input_price or 0) / 1_000_000 +
|
|
185
|
+
usage["output_tokens"] * (model_cfg.output_price or 0) / 1_000_000
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
# Build response
|
|
189
|
+
response = QueryResponse(
|
|
190
|
+
success=True,
|
|
191
|
+
content=content,
|
|
192
|
+
usage=usage,
|
|
193
|
+
cost=cost,
|
|
194
|
+
total_time=total_time,
|
|
195
|
+
references=structured.get("references", []),
|
|
196
|
+
page_references=structured.get("page_references", []),
|
|
197
|
+
image_references=structured.get("image_references", []),
|
|
198
|
+
stages_trace=result.get("trace", {}),
|
|
199
|
+
web_results=result.get("web_results", []),
|
|
200
|
+
stages_used=result.get("stages_used", [])
|
|
201
|
+
)
|
|
202
|
+
|
|
203
|
+
# Render image if output path provided
|
|
204
|
+
if output_path and content:
|
|
205
|
+
await self._ensure_renderer()
|
|
206
|
+
|
|
207
|
+
render_success = await self._renderer.render(
|
|
208
|
+
markdown_content=content,
|
|
209
|
+
output_path=output_path,
|
|
210
|
+
stats=result.get("stats", {}),
|
|
211
|
+
references=result.get("references", []),
|
|
212
|
+
page_references=result.get("page_references", []),
|
|
213
|
+
image_references=result.get("image_references", []),
|
|
214
|
+
stages_used=result.get("stages_used", []),
|
|
215
|
+
theme_color=self.config.theme_color
|
|
216
|
+
)
|
|
217
|
+
|
|
218
|
+
if render_success:
|
|
219
|
+
response.image_path = output_path
|
|
220
|
+
|
|
221
|
+
return response
|
|
222
|
+
|
|
223
|
+
except Exception as e:
|
|
224
|
+
logger.error(f"HywCore query failed: {e}")
|
|
225
|
+
logger.exception("Query error details:")
|
|
226
|
+
return QueryResponse(
|
|
227
|
+
success=False,
|
|
228
|
+
content="",
|
|
229
|
+
error=str(e),
|
|
230
|
+
total_time=time.time() - start_time
|
|
231
|
+
)
|
|
232
|
+
|
|
233
|
+
async def query_agent(
|
|
234
|
+
self,
|
|
235
|
+
request: QueryRequest,
|
|
236
|
+
output_path: Optional[str] = None
|
|
237
|
+
) -> QueryResponse:
|
|
238
|
+
"""
|
|
239
|
+
Agent-mode query with tool-calling capability.
|
|
240
|
+
|
|
241
|
+
Uses AgentPipeline which can autonomously call web_tool up to 2 times.
|
|
242
|
+
Each tool call triggers an IM notification via send_notification callback.
|
|
243
|
+
|
|
244
|
+
Args:
|
|
245
|
+
request: QueryRequest with user input, images, history
|
|
246
|
+
output_path: Optional path to save rendered image
|
|
247
|
+
|
|
248
|
+
Returns:
|
|
249
|
+
QueryResponse with content, rendered image path, and metadata
|
|
250
|
+
"""
|
|
251
|
+
start_time = time.time()
|
|
252
|
+
|
|
253
|
+
try:
|
|
254
|
+
# Get or create agent pipeline with current send_func
|
|
255
|
+
send_func = request.send_notification or self._send_func
|
|
256
|
+
|
|
257
|
+
if self._agent_pipeline is None or self._agent_pipeline.send_func != send_func:
|
|
258
|
+
self._agent_pipeline = AgentPipeline(
|
|
259
|
+
config=self.config,
|
|
260
|
+
search_service=self._search_service,
|
|
261
|
+
send_func=send_func
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
# Execute agent pipeline
|
|
265
|
+
result = await self._agent_pipeline.execute(
|
|
266
|
+
user_input=request.user_input,
|
|
267
|
+
conversation_history=request.conversation_history,
|
|
268
|
+
images=request.images if request.images else None,
|
|
269
|
+
model_name=request.model_name
|
|
270
|
+
)
|
|
271
|
+
|
|
272
|
+
total_time = time.time() - start_time
|
|
273
|
+
|
|
274
|
+
# Check for refusal
|
|
275
|
+
if result.get("refuse_answer"):
|
|
276
|
+
return QueryResponse(
|
|
277
|
+
success=True,
|
|
278
|
+
content="",
|
|
279
|
+
should_refuse=True,
|
|
280
|
+
refuse_reason=result.get("refuse_reason", ""),
|
|
281
|
+
total_time=total_time
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
# Check for error
|
|
285
|
+
if not result.get("success", True):
|
|
286
|
+
return QueryResponse(
|
|
287
|
+
success=False,
|
|
288
|
+
content="",
|
|
289
|
+
error=result.get("error", "Unknown error"),
|
|
290
|
+
total_time=total_time
|
|
291
|
+
)
|
|
292
|
+
|
|
293
|
+
# Extract response data
|
|
294
|
+
content = result.get("llm_response", "")
|
|
295
|
+
usage = result.get("usage", {})
|
|
296
|
+
|
|
297
|
+
# Convert web_results to references format for frontend
|
|
298
|
+
# Only include references that are actually cited in the markdown
|
|
299
|
+
import re
|
|
300
|
+
web_results = result.get("web_results", [])
|
|
301
|
+
|
|
302
|
+
# Build visible results list (excluding hidden items)
|
|
303
|
+
visible_results = [r for r in web_results if not r.get("_hidden")]
|
|
304
|
+
|
|
305
|
+
# Parse markdown to find which citations are used (pattern: [number])
|
|
306
|
+
citation_pattern = re.compile(r'\[(\d+)\]')
|
|
307
|
+
cited_ids = set()
|
|
308
|
+
for match in citation_pattern.finditer(content):
|
|
309
|
+
cited_ids.add(int(match.group(1)))
|
|
310
|
+
|
|
311
|
+
# Only include cited references, in order of first appearance
|
|
312
|
+
references = []
|
|
313
|
+
for idx in sorted(cited_ids):
|
|
314
|
+
# idx is 1-based in markdown
|
|
315
|
+
if 1 <= idx <= len(visible_results):
|
|
316
|
+
r = visible_results[idx - 1]
|
|
317
|
+
references.append({
|
|
318
|
+
"title": r.get("title", ""),
|
|
319
|
+
"url": r.get("url", ""),
|
|
320
|
+
"snippet": r.get("content", "")[:300] if r.get("content") else "",
|
|
321
|
+
"images": r.get("images", []),
|
|
322
|
+
"is_fetched": r.get("_type") == "page",
|
|
323
|
+
"raw_screenshot_b64": r.get("screenshot_b64"),
|
|
324
|
+
})
|
|
325
|
+
|
|
326
|
+
# Build response
|
|
327
|
+
response = QueryResponse(
|
|
328
|
+
success=True,
|
|
329
|
+
content=content,
|
|
330
|
+
usage=usage,
|
|
331
|
+
total_time=total_time,
|
|
332
|
+
references=references,
|
|
333
|
+
web_results=web_results,
|
|
334
|
+
stages_used=result.get("stages_used", [])
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
# Render image if output path provided
|
|
338
|
+
if output_path and content:
|
|
339
|
+
await self._ensure_renderer()
|
|
340
|
+
|
|
341
|
+
render_success = await self._renderer.render(
|
|
342
|
+
markdown_content=content,
|
|
343
|
+
output_path=output_path,
|
|
344
|
+
stats=result.get("stats", {}),
|
|
345
|
+
references=references,
|
|
346
|
+
page_references=[],
|
|
347
|
+
stages_used=result.get("stages_used", []),
|
|
348
|
+
theme_color=self.config.theme_color
|
|
349
|
+
)
|
|
350
|
+
|
|
351
|
+
if render_success:
|
|
352
|
+
response.image_path = output_path
|
|
353
|
+
|
|
354
|
+
return response
|
|
355
|
+
|
|
356
|
+
except Exception as e:
|
|
357
|
+
logger.error(f"HywCore query_agent failed: {e}")
|
|
358
|
+
logger.exception("Agent query error details:")
|
|
359
|
+
return QueryResponse(
|
|
360
|
+
success=False,
|
|
361
|
+
content="",
|
|
362
|
+
error=str(e),
|
|
363
|
+
total_time=time.time() - start_time
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
async def search(
|
|
367
|
+
self,
|
|
368
|
+
queries: List[str],
|
|
369
|
+
engine: Optional[str] = None,
|
|
370
|
+
limit: int = 10
|
|
371
|
+
) -> List[List[Dict[str, Any]]]:
|
|
372
|
+
"""
|
|
373
|
+
Independent search interface.
|
|
374
|
+
|
|
375
|
+
For future step-by-step search functionality.
|
|
376
|
+
|
|
377
|
+
Args:
|
|
378
|
+
queries: List of search queries
|
|
379
|
+
engine: Optional search engine override
|
|
380
|
+
limit: Results per query
|
|
381
|
+
|
|
382
|
+
Returns:
|
|
383
|
+
List of search results for each query
|
|
384
|
+
"""
|
|
385
|
+
# TODO: Support engine override per-call
|
|
386
|
+
return await self._search_service.search_batch(queries)
|
|
387
|
+
|
|
388
|
+
async def screenshot(self, url: str) -> Optional[str]:
|
|
389
|
+
"""
|
|
390
|
+
Capture full page screenshot of a URL.
|
|
391
|
+
Returns: base64 string or None
|
|
392
|
+
"""
|
|
393
|
+
# Default to full_page=True as requested for /w command
|
|
394
|
+
return await self._search_service.screenshot_url(url, full_page=True)
|
|
395
|
+
|
|
396
|
+
async def screenshot_with_content(self, url: str, max_content_length: int = 8000) -> Dict[str, Any]:
|
|
397
|
+
"""
|
|
398
|
+
Capture screenshot and extract page content.
|
|
399
|
+
|
|
400
|
+
Returns:
|
|
401
|
+
Dict with screenshot_b64, content (truncated), title, url
|
|
402
|
+
"""
|
|
403
|
+
return await self._search_service.screenshot_with_content(url, max_content_length=max_content_length)
|
|
404
|
+
|
|
405
|
+
async def screenshot_batch(self, urls: List[str]) -> List[Optional[str]]:
|
|
406
|
+
"""
|
|
407
|
+
Capture full page screenshots of multiple URLs concurrently.
|
|
408
|
+
Returns: list of base64 strings (None for failed ones)
|
|
409
|
+
"""
|
|
410
|
+
return await self._search_service.screenshot_urls_batch(urls, full_page=True)
|
|
411
|
+
|
|
412
|
+
async def fetch_pages(
|
|
413
|
+
self,
|
|
414
|
+
urls: List[str],
|
|
415
|
+
include_screenshot: bool = False
|
|
416
|
+
) -> List[Dict[str, Any]]:
|
|
417
|
+
"""
|
|
418
|
+
Fetch multiple pages.
|
|
419
|
+
|
|
420
|
+
Args:
|
|
421
|
+
urls: List of URLs to fetch
|
|
422
|
+
include_screenshot: Whether to capture screenshots
|
|
423
|
+
|
|
424
|
+
Returns:
|
|
425
|
+
List of page data dicts
|
|
426
|
+
"""
|
|
427
|
+
return await self._search_service.fetch_pages_batch(
|
|
428
|
+
urls,
|
|
429
|
+
include_screenshot=include_screenshot
|
|
430
|
+
)
|
|
431
|
+
|
|
432
|
+
async def render(
|
|
433
|
+
self,
|
|
434
|
+
markdown_content: str,
|
|
435
|
+
output_path: str,
|
|
436
|
+
**kwargs
|
|
437
|
+
) -> bool:
|
|
438
|
+
"""
|
|
439
|
+
Render markdown to image.
|
|
440
|
+
|
|
441
|
+
Args:
|
|
442
|
+
markdown_content: Markdown to render
|
|
443
|
+
output_path: Path to save image
|
|
444
|
+
**kwargs: Additional render options
|
|
445
|
+
|
|
446
|
+
Returns:
|
|
447
|
+
True if successful
|
|
448
|
+
"""
|
|
449
|
+
await self._ensure_renderer()
|
|
450
|
+
return await self._renderer.render(
|
|
451
|
+
markdown_content=markdown_content,
|
|
452
|
+
output_path=output_path,
|
|
453
|
+
theme_color=kwargs.pop("theme_color", self.config.theme_color),
|
|
454
|
+
**kwargs
|
|
455
|
+
)
|
|
456
|
+
|
|
457
|
+
async def close(self):
|
|
458
|
+
"""Close all resources."""
|
|
459
|
+
if self._renderer:
|
|
460
|
+
await self._renderer.close()
|
|
461
|
+
await self._pipeline.close()
|
|
462
|
+
logger.info("HywCore closed")
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""
|
|
2
|
+
hyw_core.crawling - Intelligent Web Crawling Module
|
|
3
|
+
|
|
4
|
+
Provides Crawl4AI-inspired adaptive crawling with:
|
|
5
|
+
- Page completeness guarantees (image loading verification)
|
|
6
|
+
- Content quality scoring
|
|
7
|
+
- Adaptive stop logic
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
from .models import CrawlConfig, PageResult, CompletenessResult
|
|
11
|
+
from .completeness import CompletenessChecker
|
|
12
|
+
|
|
13
|
+
__all__ = [
|
|
14
|
+
"CrawlConfig",
|
|
15
|
+
"PageResult",
|
|
16
|
+
"CompletenessResult",
|
|
17
|
+
"CompletenessChecker",
|
|
18
|
+
]
|