entari-plugin-hyw 4.0.0rc17__py3-none-any.whl → 4.0.0rc19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of entari-plugin-hyw might be problematic. Click here for more details.

Files changed (55) hide show
  1. entari_plugin_hyw-4.0.0rc19.dist-info/METADATA +26 -0
  2. entari_plugin_hyw-4.0.0rc19.dist-info/RECORD +4 -0
  3. entari_plugin_hyw-4.0.0rc19.dist-info/top_level.txt +1 -0
  4. entari_plugin_hyw/__init__.py +0 -914
  5. entari_plugin_hyw/filters.py +0 -83
  6. entari_plugin_hyw/history.py +0 -251
  7. entari_plugin_hyw/misc.py +0 -214
  8. entari_plugin_hyw/search_cache.py +0 -253
  9. entari_plugin_hyw-4.0.0rc17.dist-info/METADATA +0 -119
  10. entari_plugin_hyw-4.0.0rc17.dist-info/RECORD +0 -52
  11. entari_plugin_hyw-4.0.0rc17.dist-info/top_level.txt +0 -2
  12. hyw_core/__init__.py +0 -94
  13. hyw_core/agent.py +0 -876
  14. hyw_core/browser_control/__init__.py +0 -63
  15. hyw_core/browser_control/assets/card-dist/index.html +0 -429
  16. hyw_core/browser_control/assets/card-dist/logos/anthropic.svg +0 -1
  17. hyw_core/browser_control/assets/card-dist/logos/cerebras.svg +0 -9
  18. hyw_core/browser_control/assets/card-dist/logos/deepseek.png +0 -0
  19. hyw_core/browser_control/assets/card-dist/logos/gemini.svg +0 -1
  20. hyw_core/browser_control/assets/card-dist/logos/google.svg +0 -1
  21. hyw_core/browser_control/assets/card-dist/logos/grok.png +0 -0
  22. hyw_core/browser_control/assets/card-dist/logos/huggingface.png +0 -0
  23. hyw_core/browser_control/assets/card-dist/logos/microsoft.svg +0 -15
  24. hyw_core/browser_control/assets/card-dist/logos/minimax.png +0 -0
  25. hyw_core/browser_control/assets/card-dist/logos/mistral.png +0 -0
  26. hyw_core/browser_control/assets/card-dist/logos/nvida.png +0 -0
  27. hyw_core/browser_control/assets/card-dist/logos/openai.svg +0 -1
  28. hyw_core/browser_control/assets/card-dist/logos/openrouter.png +0 -0
  29. hyw_core/browser_control/assets/card-dist/logos/perplexity.svg +0 -24
  30. hyw_core/browser_control/assets/card-dist/logos/qwen.png +0 -0
  31. hyw_core/browser_control/assets/card-dist/logos/xai.png +0 -0
  32. hyw_core/browser_control/assets/card-dist/logos/xiaomi.png +0 -0
  33. hyw_core/browser_control/assets/card-dist/logos/zai.png +0 -0
  34. hyw_core/browser_control/assets/card-dist/vite.svg +0 -1
  35. hyw_core/browser_control/engines/__init__.py +0 -15
  36. hyw_core/browser_control/engines/base.py +0 -13
  37. hyw_core/browser_control/engines/default.py +0 -166
  38. hyw_core/browser_control/engines/duckduckgo.py +0 -171
  39. hyw_core/browser_control/landing.html +0 -172
  40. hyw_core/browser_control/manager.py +0 -173
  41. hyw_core/browser_control/renderer.py +0 -446
  42. hyw_core/browser_control/service.py +0 -1002
  43. hyw_core/config.py +0 -154
  44. hyw_core/core.py +0 -454
  45. hyw_core/crawling/__init__.py +0 -18
  46. hyw_core/crawling/completeness.py +0 -437
  47. hyw_core/crawling/models.py +0 -88
  48. hyw_core/definitions.py +0 -166
  49. hyw_core/image_cache.py +0 -274
  50. hyw_core/pipeline.py +0 -502
  51. hyw_core/search.py +0 -169
  52. hyw_core/stages/__init__.py +0 -21
  53. hyw_core/stages/base.py +0 -95
  54. hyw_core/stages/summary.py +0 -218
  55. {entari_plugin_hyw-4.0.0rc17.dist-info → entari_plugin_hyw-4.0.0rc19.dist-info}/WHEEL +0 -0
hyw_core/config.py DELETED
@@ -1,154 +0,0 @@
1
- """
2
- hyw_core.config - Configuration Management
3
-
4
- Provides standalone configuration for hyw-core with optional passthrough from parent packages.
5
- """
6
-
7
- from dataclasses import dataclass, field
8
- from typing import Dict, List, Any, Optional
9
-
10
-
11
- @dataclass
12
- class ModelConfig:
13
- """Configuration for a specific model."""
14
- model_name: Optional[str] = None
15
- api_key: Optional[str] = None
16
- base_url: Optional[str] = None
17
- extra_body: Optional[Dict[str, Any]] = None
18
- model_provider: Optional[str] = None
19
- input_price: Optional[float] = None
20
- output_price: Optional[float] = None
21
- image_input: bool = True
22
-
23
-
24
- @dataclass
25
- class HywCoreConfig:
26
- """
27
- Core configuration for hyw-core.
28
-
29
- Can be used standalone or with passthrough from parent packages.
30
-
31
- Usage:
32
- # Standalone from YAML
33
- config = HywCoreConfig.from_yaml("config.yaml")
34
-
35
- # Passthrough from parent
36
- config = HywCoreConfig.from_dict({
37
- "model_name": parent_config.model_name,
38
- "api_key": parent_config.api_key,
39
- ...
40
- })
41
- """
42
-
43
- # LLM Configuration
44
- models: List[Dict[str, Any]] = field(default_factory=list)
45
- model_name: str = ""
46
- api_key: str = ""
47
- base_url: str = ""
48
- temperature: float = 0.4
49
-
50
- # Stage-specific model overrides
51
- instruct_model: Optional[str] = None
52
- instruct_api_key: Optional[str] = None
53
- instruct_base_url: Optional[str] = None
54
- instruct_extra_body: Optional[Dict[str, Any]] = None
55
-
56
- summary_model: Optional[str] = None
57
- summary_api_key: Optional[str] = None
58
- summary_base_url: Optional[str] = None
59
- summary_extra_body: Optional[Dict[str, Any]] = None
60
-
61
- # Search Configuration
62
- search_engine: str = "duckduckgo"
63
- search_limit: int = 10
64
- blocked_domains: List[str] = field(default_factory=list)
65
-
66
- # Browser Configuration
67
- headless: bool = True
68
- fetch_timeout: float = 20.0
69
-
70
- # Output Configuration
71
- language: str = "Simplified Chinese"
72
- theme_color: str = "#ef4444"
73
-
74
- # Pricing (for cost estimation)
75
- input_price: float = 0.0
76
- output_price: float = 0.0
77
-
78
- @classmethod
79
- def from_dict(cls, data: Dict[str, Any]) -> "HywCoreConfig":
80
- """
81
- Create config from dictionary.
82
-
83
- Used for passthrough from parent packages.
84
- Filters out unknown fields to allow flexible passthrough.
85
- """
86
- import dataclasses
87
- field_names = {f.name for f in dataclasses.fields(cls)}
88
- filtered_data = {k: v for k, v in data.items() if k in field_names}
89
- return cls(**filtered_data)
90
-
91
- @classmethod
92
- def from_yaml(cls, path: str) -> "HywCoreConfig":
93
- """
94
- Load config from YAML file.
95
-
96
- Used for standalone usage.
97
- """
98
- import yaml
99
- with open(path, 'r', encoding='utf-8') as f:
100
- data = yaml.safe_load(f) or {}
101
- return cls.from_dict(data)
102
-
103
- def get_model_config(self, stage: str) -> ModelConfig:
104
- """
105
- Get resolved model config for a stage.
106
-
107
- Args:
108
- stage: "instruct", "qa", or "main" (summary)
109
-
110
- Returns:
111
- ModelConfig with resolved settings
112
- """
113
- # Determine primary and secondary stage config keys
114
- if stage == "instruct":
115
- primary_prefix = "instruct_"
116
- secondary_prefix = None
117
- elif stage == "qa":
118
- primary_prefix = "qa_"
119
- secondary_prefix = "instruct_"
120
- else: # "main" / summary
121
- primary_prefix = "summary_"
122
- secondary_prefix = None
123
-
124
- def resolve(field_name: str, is_essential: bool = True):
125
- """Resolve a field with fallback: Primary -> Secondary -> Root."""
126
- # Try primary
127
- if primary_prefix:
128
- val = getattr(self, f"{primary_prefix}{field_name}", None)
129
- if val:
130
- return val
131
-
132
- # Try secondary
133
- if secondary_prefix:
134
- val = getattr(self, f"{secondary_prefix}{field_name}", None)
135
- if val:
136
- return val
137
-
138
- # Fallback to root
139
- return getattr(self, field_name, None)
140
-
141
- return ModelConfig(
142
- model_name=resolve("model") or resolve("model_name") or self.model_name,
143
- api_key=resolve("api_key") or self.api_key,
144
- base_url=resolve("base_url") or self.base_url,
145
- extra_body=resolve("extra_body"),
146
- model_provider=resolve("model_provider"),
147
- input_price=resolve("input_price") or self.input_price,
148
- output_price=resolve("output_price") or self.output_price,
149
- )
150
-
151
- def to_dict(self) -> Dict[str, Any]:
152
- """Convert config to dictionary."""
153
- import dataclasses
154
- return dataclasses.asdict(self)
hyw_core/core.py DELETED
@@ -1,454 +0,0 @@
1
- """
2
- hyw_core.core - Main HywCore Class
3
-
4
- Provides the unified LLM query interface and search capabilities.
5
- """
6
-
7
- import asyncio
8
- import time
9
- from dataclasses import dataclass, field
10
- from typing import Dict, List, Any, Optional, Callable, Awaitable
11
-
12
- from loguru import logger
13
-
14
- from .config import HywCoreConfig, ModelConfig
15
- from .pipeline import ModularPipeline
16
- from .agent import AgentPipeline
17
- from .search import SearchService
18
- from .stages.base import StageContext
19
-
20
-
21
- @dataclass
22
- class QueryRequest:
23
- """Request for the query interface."""
24
- user_input: str
25
- images: List[str] = field(default_factory=list) # base64 encoded images
26
- conversation_history: List[Dict] = field(default_factory=list)
27
- model_name: Optional[str] = None # Override model
28
-
29
- # Optional callbacks
30
- send_notification: Optional[Callable[[str], Awaitable[None]]] = None
31
-
32
-
33
- @dataclass
34
- class QueryResponse:
35
- """Response from the query interface."""
36
- success: bool
37
- content: str # Markdown response
38
- image_path: Optional[str] = None # Path to rendered image
39
-
40
- # Statistics
41
- usage: Dict[str, int] = field(default_factory=dict)
42
- cost: float = 0.0
43
- total_time: float = 0.0
44
-
45
- # References
46
- references: List[Dict[str, Any]] = field(default_factory=list)
47
- page_references: List[Dict[str, Any]] = field(default_factory=list)
48
- image_references: List[Dict[str, Any]] = field(default_factory=list)
49
-
50
- # Trace information
51
- stages_trace: Dict[str, Any] = field(default_factory=dict)
52
-
53
- # Error handling
54
- error: Optional[str] = None
55
- should_refuse: bool = False
56
- refuse_reason: str = ""
57
-
58
- # Debug/Save
59
- web_results: List[Dict[str, Any]] = field(default_factory=list)
60
- stages_used: List[Dict[str, Any]] = field(default_factory=list)
61
-
62
-
63
-
64
- class HywCore:
65
- """
66
- HYW Core Service.
67
-
68
- Provides the unified LLM query interface (/q command) and search capabilities.
69
-
70
- Usage:
71
- from hyw_core import HywCore, HywCoreConfig, QueryRequest
72
-
73
- config = HywCoreConfig.from_yaml("config.yaml")
74
- core = HywCore(config)
75
-
76
- response = await core.query(QueryRequest(
77
- user_input="What is Python?",
78
- images=[],
79
- conversation_history=[]
80
- ))
81
- """
82
-
83
- def __init__(
84
- self,
85
- config: HywCoreConfig,
86
- send_func: Optional[Callable[[str], Awaitable[None]]] = None
87
- ):
88
- """
89
- Initialize HywCore.
90
-
91
- Args:
92
- config: HywCoreConfig instance
93
- send_func: Optional callback for sending notifications
94
- """
95
- self.config = config
96
- self._send_func = send_func
97
-
98
- # Create search service
99
- self._search_service = SearchService(config)
100
-
101
- # Create pipeline (for non-agent mode)
102
- self._pipeline = ModularPipeline(
103
- config=config,
104
- search_service=self._search_service,
105
- send_func=send_func
106
- )
107
-
108
- # Agent pipeline (lazy init)
109
- self._agent_pipeline = None
110
-
111
- # Create renderer (lazy init)
112
- self._renderer = None
113
-
114
- logger.info("HywCore initialized")
115
-
116
- async def _ensure_renderer(self):
117
- """Lazy initialize renderer."""
118
- if self._renderer is None:
119
- from .browser_control import ContentRenderer
120
- self._renderer = ContentRenderer(headless=self.config.headless)
121
- await self._renderer.start()
122
-
123
- async def query(
124
- self,
125
- request: QueryRequest,
126
- output_path: Optional[str] = None
127
- ) -> QueryResponse:
128
- """
129
- Unified query interface.
130
-
131
- This is the main entry point for /q commands.
132
-
133
- Args:
134
- request: QueryRequest with user input, images, history
135
- output_path: Optional path to save rendered image
136
-
137
- Returns:
138
- QueryResponse with content, rendered image path, and metadata
139
- """
140
- start_time = time.time()
141
-
142
- try:
143
- # Override model if specified
144
- model_name = request.model_name or self.config.model_name
145
-
146
- # Use notification callback from request if provided
147
- send_func = request.send_notification or self._send_func
148
- if send_func and self._pipeline._send_func != send_func:
149
- self._pipeline._send_func = send_func
150
-
151
- # Execute pipeline
152
- result = await self._pipeline.execute(
153
- user_input=request.user_input,
154
- conversation_history=request.conversation_history,
155
- model_name=model_name,
156
- images=request.images if request.images else None
157
- )
158
-
159
- total_time = time.time() - start_time
160
-
161
- # Check for refusal
162
- if result.get("should_refuse"):
163
- return QueryResponse(
164
- success=True,
165
- content="",
166
- should_refuse=True,
167
- refuse_reason=result.get("refuse_reason", ""),
168
- total_time=total_time
169
- )
170
-
171
- # Extract response data
172
- content = result.get("llm_response", "")
173
- structured = result.get("structured_response", {})
174
- billing = result.get("billing_info", {})
175
-
176
- usage = {
177
- "input_tokens": billing.get("input_tokens", 0),
178
- "output_tokens": billing.get("output_tokens", 0)
179
- }
180
-
181
- # Calculate cost
182
- model_cfg = self.config.get_model_config("main")
183
- cost = (
184
- usage["input_tokens"] * (model_cfg.input_price or 0) / 1_000_000 +
185
- usage["output_tokens"] * (model_cfg.output_price or 0) / 1_000_000
186
- )
187
-
188
- # Build response
189
- response = QueryResponse(
190
- success=True,
191
- content=content,
192
- usage=usage,
193
- cost=cost,
194
- total_time=total_time,
195
- references=structured.get("references", []),
196
- page_references=structured.get("page_references", []),
197
- image_references=structured.get("image_references", []),
198
- stages_trace=result.get("trace", {}),
199
- web_results=result.get("web_results", []),
200
- stages_used=result.get("stages_used", [])
201
- )
202
-
203
- # Render image if output path provided
204
- if output_path and content:
205
- await self._ensure_renderer()
206
-
207
- render_success = await self._renderer.render(
208
- markdown_content=content,
209
- output_path=output_path,
210
- stats=result.get("stats", {}),
211
- references=result.get("references", []),
212
- page_references=result.get("page_references", []),
213
- image_references=result.get("image_references", []),
214
- stages_used=result.get("stages_used", []),
215
- theme_color=self.config.theme_color
216
- )
217
-
218
- if render_success:
219
- response.image_path = output_path
220
-
221
- return response
222
-
223
- except Exception as e:
224
- logger.error(f"HywCore query failed: {e}")
225
- logger.exception("Query error details:")
226
- return QueryResponse(
227
- success=False,
228
- content="",
229
- error=str(e),
230
- total_time=time.time() - start_time
231
- )
232
-
233
- async def query_agent(
234
- self,
235
- request: QueryRequest,
236
- output_path: Optional[str] = None
237
- ) -> QueryResponse:
238
- """
239
- Agent-mode query with tool-calling capability.
240
-
241
- Uses AgentPipeline which can autonomously call web_tool up to 2 times.
242
- Each tool call triggers an IM notification via send_notification callback.
243
-
244
- Args:
245
- request: QueryRequest with user input, images, history
246
- output_path: Optional path to save rendered image
247
-
248
- Returns:
249
- QueryResponse with content, rendered image path, and metadata
250
- """
251
- start_time = time.time()
252
-
253
- try:
254
- # Get or create agent pipeline with current send_func
255
- send_func = request.send_notification or self._send_func
256
-
257
- if self._agent_pipeline is None or self._agent_pipeline.send_func != send_func:
258
- self._agent_pipeline = AgentPipeline(
259
- config=self.config,
260
- search_service=self._search_service,
261
- send_func=send_func
262
- )
263
-
264
- # Execute agent pipeline
265
- result = await self._agent_pipeline.execute(
266
- user_input=request.user_input,
267
- conversation_history=request.conversation_history,
268
- images=request.images if request.images else None,
269
- model_name=request.model_name
270
- )
271
-
272
- total_time = time.time() - start_time
273
-
274
- # Check for refusal
275
- if result.get("refuse_answer"):
276
- return QueryResponse(
277
- success=True,
278
- content="",
279
- should_refuse=True,
280
- refuse_reason=result.get("refuse_reason", ""),
281
- total_time=total_time
282
- )
283
-
284
- # Check for error
285
- if not result.get("success", True):
286
- return QueryResponse(
287
- success=False,
288
- content="",
289
- error=result.get("error", "Unknown error"),
290
- total_time=total_time
291
- )
292
-
293
- # Extract response data
294
- content = result.get("llm_response", "")
295
- usage = result.get("usage", {})
296
-
297
- # Convert web_results to references format for frontend
298
- # Only include references that are actually cited in the markdown
299
- import re
300
- web_results = result.get("web_results", [])
301
-
302
- # Build visible results list (excluding hidden items)
303
- visible_results = [r for r in web_results if not r.get("_hidden")]
304
-
305
- # Pass ALL visible results to frontend so [N] citations map correctly to N-th item
306
- # App.vue handles reordering used vs unused citations
307
- references = []
308
- for r in visible_results:
309
- references.append({
310
- "title": r.get("title", ""),
311
- "url": r.get("url", ""),
312
- "snippet": r.get("content", "")[:300] if r.get("content") else "",
313
- "images": r.get("images", []),
314
- "is_fetched": r.get("_type") == "page",
315
- "raw_screenshot_b64": r.get("screenshot_b64"),
316
- })
317
-
318
- # Build response
319
- response = QueryResponse(
320
- success=True,
321
- content=content,
322
- usage=usage,
323
- total_time=total_time,
324
- references=references,
325
- web_results=web_results,
326
- stages_used=result.get("stages_used", [])
327
- )
328
-
329
- # Render image if output path provided
330
- if output_path and content:
331
- await self._ensure_renderer()
332
-
333
- render_success = await self._renderer.render(
334
- markdown_content=content,
335
- output_path=output_path,
336
- stats=result.get("stats", {}),
337
- references=references,
338
- page_references=[],
339
- stages_used=result.get("stages_used", []),
340
- theme_color=self.config.theme_color
341
- )
342
-
343
- if render_success:
344
- response.image_path = output_path
345
-
346
- return response
347
-
348
- except Exception as e:
349
- logger.error(f"HywCore query_agent failed: {e}")
350
- logger.exception("Agent query error details:")
351
- return QueryResponse(
352
- success=False,
353
- content="",
354
- error=str(e),
355
- total_time=time.time() - start_time
356
- )
357
-
358
- async def search(
359
- self,
360
- queries: List[str],
361
- engine: Optional[str] = None,
362
- limit: int = 10
363
- ) -> List[List[Dict[str, Any]]]:
364
- """
365
- Independent search interface.
366
-
367
- For future step-by-step search functionality.
368
-
369
- Args:
370
- queries: List of search queries
371
- engine: Optional search engine override
372
- limit: Results per query
373
-
374
- Returns:
375
- List of search results for each query
376
- """
377
- # TODO: Support engine override per-call
378
- return await self._search_service.search_batch(queries)
379
-
380
- async def screenshot(self, url: str) -> Optional[str]:
381
- """
382
- Capture full page screenshot of a URL.
383
- Returns: base64 string or None
384
- """
385
- # Default to full_page=True as requested for /w command
386
- return await self._search_service.screenshot_url(url, full_page=True)
387
-
388
- async def screenshot_with_content(self, url: str, max_content_length: int = 8000) -> Dict[str, Any]:
389
- """
390
- Capture screenshot and extract page content.
391
-
392
- Returns:
393
- Dict with screenshot_b64, content (truncated), title, url
394
- """
395
- return await self._search_service.screenshot_with_content(url, max_content_length=max_content_length)
396
-
397
- async def screenshot_batch(self, urls: List[str]) -> List[Optional[str]]:
398
- """
399
- Capture full page screenshots of multiple URLs concurrently.
400
- Returns: list of base64 strings (None for failed ones)
401
- """
402
- return await self._search_service.screenshot_urls_batch(urls, full_page=True)
403
-
404
- async def fetch_pages(
405
- self,
406
- urls: List[str],
407
- include_screenshot: bool = False
408
- ) -> List[Dict[str, Any]]:
409
- """
410
- Fetch multiple pages.
411
-
412
- Args:
413
- urls: List of URLs to fetch
414
- include_screenshot: Whether to capture screenshots
415
-
416
- Returns:
417
- List of page data dicts
418
- """
419
- return await self._search_service.fetch_pages_batch(
420
- urls,
421
- include_screenshot=include_screenshot
422
- )
423
-
424
- async def render(
425
- self,
426
- markdown_content: str,
427
- output_path: str,
428
- **kwargs
429
- ) -> bool:
430
- """
431
- Render markdown to image.
432
-
433
- Args:
434
- markdown_content: Markdown to render
435
- output_path: Path to save image
436
- **kwargs: Additional render options
437
-
438
- Returns:
439
- True if successful
440
- """
441
- await self._ensure_renderer()
442
- return await self._renderer.render(
443
- markdown_content=markdown_content,
444
- output_path=output_path,
445
- theme_color=kwargs.pop("theme_color", self.config.theme_color),
446
- **kwargs
447
- )
448
-
449
- async def close(self):
450
- """Close all resources."""
451
- if self._renderer:
452
- await self._renderer.close()
453
- await self._pipeline.close()
454
- logger.info("HywCore closed")
@@ -1,18 +0,0 @@
1
- """
2
- hyw_core.crawling - Intelligent Web Crawling Module
3
-
4
- Provides Crawl4AI-inspired adaptive crawling with:
5
- - Page completeness guarantees (image loading verification)
6
- - Content quality scoring
7
- - Adaptive stop logic
8
- """
9
-
10
- from .models import CrawlConfig, PageResult, CompletenessResult
11
- from .completeness import CompletenessChecker
12
-
13
- __all__ = [
14
- "CrawlConfig",
15
- "PageResult",
16
- "CompletenessResult",
17
- "CompletenessChecker",
18
- ]