isage-middleware 0.2.4.3__cp311-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (94) hide show
  1. isage_middleware-0.2.4.3.dist-info/METADATA +266 -0
  2. isage_middleware-0.2.4.3.dist-info/RECORD +94 -0
  3. isage_middleware-0.2.4.3.dist-info/WHEEL +5 -0
  4. isage_middleware-0.2.4.3.dist-info/top_level.txt +1 -0
  5. sage/middleware/__init__.py +59 -0
  6. sage/middleware/_version.py +6 -0
  7. sage/middleware/components/__init__.py +30 -0
  8. sage/middleware/components/extensions_compat.py +141 -0
  9. sage/middleware/components/sage_db/__init__.py +116 -0
  10. sage/middleware/components/sage_db/backend.py +136 -0
  11. sage/middleware/components/sage_db/service.py +15 -0
  12. sage/middleware/components/sage_flow/__init__.py +76 -0
  13. sage/middleware/components/sage_flow/python/__init__.py +14 -0
  14. sage/middleware/components/sage_flow/python/micro_service/__init__.py +4 -0
  15. sage/middleware/components/sage_flow/python/micro_service/sage_flow_service.py +88 -0
  16. sage/middleware/components/sage_flow/python/sage_flow.py +30 -0
  17. sage/middleware/components/sage_flow/service.py +14 -0
  18. sage/middleware/components/sage_mem/__init__.py +83 -0
  19. sage/middleware/components/sage_sias/__init__.py +59 -0
  20. sage/middleware/components/sage_sias/continual_learner.py +184 -0
  21. sage/middleware/components/sage_sias/coreset_selector.py +302 -0
  22. sage/middleware/components/sage_sias/types.py +94 -0
  23. sage/middleware/components/sage_tsdb/__init__.py +81 -0
  24. sage/middleware/components/sage_tsdb/python/__init__.py +21 -0
  25. sage/middleware/components/sage_tsdb/python/_sage_tsdb.pyi +17 -0
  26. sage/middleware/components/sage_tsdb/python/algorithms/__init__.py +17 -0
  27. sage/middleware/components/sage_tsdb/python/algorithms/base.py +51 -0
  28. sage/middleware/components/sage_tsdb/python/algorithms/out_of_order_join.py +248 -0
  29. sage/middleware/components/sage_tsdb/python/algorithms/window_aggregator.py +296 -0
  30. sage/middleware/components/sage_tsdb/python/micro_service/__init__.py +7 -0
  31. sage/middleware/components/sage_tsdb/python/micro_service/sage_tsdb_service.py +365 -0
  32. sage/middleware/components/sage_tsdb/python/sage_tsdb.py +523 -0
  33. sage/middleware/components/sage_tsdb/service.py +17 -0
  34. sage/middleware/components/vector_stores/__init__.py +25 -0
  35. sage/middleware/components/vector_stores/chroma.py +483 -0
  36. sage/middleware/components/vector_stores/chroma_adapter.py +185 -0
  37. sage/middleware/components/vector_stores/milvus.py +677 -0
  38. sage/middleware/operators/__init__.py +56 -0
  39. sage/middleware/operators/agent/__init__.py +24 -0
  40. sage/middleware/operators/agent/planning/__init__.py +5 -0
  41. sage/middleware/operators/agent/planning/llm_adapter.py +41 -0
  42. sage/middleware/operators/agent/planning/planner_adapter.py +98 -0
  43. sage/middleware/operators/agent/planning/router.py +107 -0
  44. sage/middleware/operators/agent/runtime.py +296 -0
  45. sage/middleware/operators/agentic/__init__.py +41 -0
  46. sage/middleware/operators/agentic/config.py +254 -0
  47. sage/middleware/operators/agentic/planning_operator.py +125 -0
  48. sage/middleware/operators/agentic/refined_searcher.py +132 -0
  49. sage/middleware/operators/agentic/runtime.py +241 -0
  50. sage/middleware/operators/agentic/timing_operator.py +125 -0
  51. sage/middleware/operators/agentic/tool_selection_operator.py +127 -0
  52. sage/middleware/operators/context/__init__.py +17 -0
  53. sage/middleware/operators/context/critic_evaluation.py +16 -0
  54. sage/middleware/operators/context/model_context.py +565 -0
  55. sage/middleware/operators/context/quality_label.py +12 -0
  56. sage/middleware/operators/context/search_query_results.py +61 -0
  57. sage/middleware/operators/context/search_result.py +42 -0
  58. sage/middleware/operators/context/search_session.py +79 -0
  59. sage/middleware/operators/filters/__init__.py +26 -0
  60. sage/middleware/operators/filters/context_sink.py +387 -0
  61. sage/middleware/operators/filters/context_source.py +376 -0
  62. sage/middleware/operators/filters/evaluate_filter.py +83 -0
  63. sage/middleware/operators/filters/tool_filter.py +74 -0
  64. sage/middleware/operators/llm/__init__.py +18 -0
  65. sage/middleware/operators/llm/sagellm_generator.py +432 -0
  66. sage/middleware/operators/rag/__init__.py +147 -0
  67. sage/middleware/operators/rag/arxiv.py +331 -0
  68. sage/middleware/operators/rag/chunk.py +13 -0
  69. sage/middleware/operators/rag/document_loaders.py +23 -0
  70. sage/middleware/operators/rag/evaluate.py +658 -0
  71. sage/middleware/operators/rag/generator.py +340 -0
  72. sage/middleware/operators/rag/index_builder/__init__.py +48 -0
  73. sage/middleware/operators/rag/index_builder/builder.py +363 -0
  74. sage/middleware/operators/rag/index_builder/manifest.py +101 -0
  75. sage/middleware/operators/rag/index_builder/storage.py +131 -0
  76. sage/middleware/operators/rag/pipeline.py +46 -0
  77. sage/middleware/operators/rag/profiler.py +59 -0
  78. sage/middleware/operators/rag/promptor.py +400 -0
  79. sage/middleware/operators/rag/refiner.py +231 -0
  80. sage/middleware/operators/rag/reranker.py +364 -0
  81. sage/middleware/operators/rag/retriever.py +1308 -0
  82. sage/middleware/operators/rag/searcher.py +37 -0
  83. sage/middleware/operators/rag/types.py +28 -0
  84. sage/middleware/operators/rag/writer.py +80 -0
  85. sage/middleware/operators/tools/__init__.py +71 -0
  86. sage/middleware/operators/tools/arxiv_paper_searcher.py +175 -0
  87. sage/middleware/operators/tools/arxiv_searcher.py +102 -0
  88. sage/middleware/operators/tools/duckduckgo_searcher.py +105 -0
  89. sage/middleware/operators/tools/image_captioner.py +104 -0
  90. sage/middleware/operators/tools/nature_news_fetcher.py +224 -0
  91. sage/middleware/operators/tools/searcher_tool.py +514 -0
  92. sage/middleware/operators/tools/text_detector.py +185 -0
  93. sage/middleware/operators/tools/url_text_extractor.py +104 -0
  94. sage/middleware/py.typed +2 -0
@@ -0,0 +1,37 @@
1
+ from typing import Any
2
+
3
+ import requests
4
+
5
+ from sage.common.core.functions import MapFunction as MapOperator
6
+
7
+
8
+ class BochaWebSearch(MapOperator):
9
+ def __init__(self, config: dict[str, Any], **kwargs):
10
+ super().__init__(**kwargs)
11
+ self.api_key = config.get("api_key")
12
+ self.count = config.get("count", 10)
13
+ self.page = config.get("page", 1)
14
+ self.summary = config.get("summary", True)
15
+ self.url = "https://api.bochaai.com/v1/web-search"
16
+
17
+ if not self.api_key:
18
+ raise ValueError("BochaWebSearch requires an 'api_key' in config.")
19
+
20
+ def execute(self, data: str) -> dict[str, Any]:
21
+ query = data
22
+ headers = {"Authorization": self.api_key, "Content-Type": "application/json"}
23
+ payload = {
24
+ "query": query,
25
+ "summary": self.summary,
26
+ "count": self.count,
27
+ "page": self.page,
28
+ }
29
+
30
+ try:
31
+ response = requests.post(self.url, headers=headers, json=payload)
32
+ response.raise_for_status()
33
+ result = response.json()
34
+ return result
35
+ except Exception as e:
36
+ self.logger.error(f"BochaWebSearch error: {e}", exc_info=True)
37
+ return {} # Return empty dict on error
@@ -0,0 +1,28 @@
1
+ """Compatibility shim for RAG type definitions.
2
+
3
+ Import from ``sage.libs.rag.types`` instead of middleware.
4
+ """
5
+
6
+ from sage.libs.rag.types import ( # noqa: F401
7
+ RAGDocument,
8
+ RAGInput,
9
+ RAGOutput,
10
+ RAGQuery,
11
+ RAGResponse,
12
+ create_rag_response,
13
+ ensure_rag_response,
14
+ extract_query,
15
+ extract_results,
16
+ )
17
+
18
+ __all__ = [
19
+ "RAGDocument",
20
+ "RAGQuery",
21
+ "RAGResponse",
22
+ "RAGInput",
23
+ "RAGOutput",
24
+ "ensure_rag_response",
25
+ "extract_query",
26
+ "extract_results",
27
+ "create_rag_response",
28
+ ]
@@ -0,0 +1,80 @@
1
+ from sage.common.core.functions import MapFunction as MapOperator
2
+
3
+
4
+ class MemoryWriter(MapOperator):
5
+ def __init__(self, config: dict, **kwargs):
6
+ super().__init__(config, **kwargs)
7
+ self.state = None
8
+ self.config = config
9
+ # 初始化各类型集合
10
+ self.collections = {}
11
+
12
+ # 配置STM
13
+ if self.config.get("stm", False):
14
+ stm_config = self.config.get("stm_config", {})
15
+ self.collections["stm"] = {
16
+ "collection": self.config.get("stm_collection"),
17
+ "config": stm_config,
18
+ }
19
+
20
+ # 配置LTM
21
+ if self.config.get("ltm", False):
22
+ ltm_config = self.config.get("ltm_config", {})
23
+ self.collections["ltm"] = {
24
+ "collection": self.config.get("ltm_collection"),
25
+ "config": ltm_config,
26
+ }
27
+
28
+ # 配置DCM
29
+ if self.config.get("dcm", False):
30
+ dcm_config = self.config.get("dcm_config", {})
31
+ self.collections["dcm"] = {
32
+ "collection": self.config.get("dcm_collection"),
33
+ "config": dcm_config,
34
+ }
35
+ # TODO: 在runtime_context中增加状态管理
36
+ # Issue URL: https://github.com/intellistream/SAGE/issues/235
37
+
38
+ def execute(self, data: str | list[str] | tuple[str, str]):
39
+ input_data = data
40
+
41
+ # 统一数据类型处理
42
+ processed_data = []
43
+ if isinstance(input_data, list):
44
+ processed_data = input_data
45
+ elif isinstance(input_data, tuple) and len(input_data) == 2:
46
+ processed_data = [f"{input_data[0]}{input_data[1]}"] # 拼接元组
47
+ elif isinstance(input_data, str):
48
+ processed_data = [input_data]
49
+ else:
50
+ self.logger.error(f"Unsupported data type: {type(input_data)}")
51
+ return data
52
+
53
+ # 写入所有启用的集合
54
+ for mem_type, settings in self.collections.items():
55
+ collection = settings["collection"]
56
+ config = settings["config"]
57
+ if not collection:
58
+ self.logger.warning(f"{mem_type.upper()} collection not initialized")
59
+ continue
60
+
61
+ try:
62
+ # TODO: 这里的实现实际上要成为由writer 这个function主动往memory manager function发送一个数据。
63
+ # 而 memory manager function拿到这个数据之后就会去执行 `execute' method 即可实现记忆的读写。
64
+ # 这里可能会有一个由于调度原因导致的阻塞 -- 可以被优化,请参考MorphStream!
65
+ if self.state is not None:
66
+ self.state.store(
67
+ collection=collection,
68
+ documents=processed_data,
69
+ collection_config=config,
70
+ )
71
+ self.logger.debug(f"Stored {len(processed_data)} chunks to {mem_type.upper()}")
72
+ else:
73
+ self.logger.warning(
74
+ f"State manager not initialized. Cannot store to {mem_type.upper()}. "
75
+ "See TODO: https://github.com/intellistream/SAGE/issues/235"
76
+ )
77
+ except Exception as e:
78
+ self.logger.error(f"Failed to store to {mem_type.upper()}: {str(e)}")
79
+
80
+ return data # 返回原始数据
@@ -0,0 +1,71 @@
1
+ """
2
+ Tool Operators
3
+
4
+ This module contains domain-specific tool operators:
5
+ - Search tools (web search, document search)
6
+ - Data extraction tools
7
+
8
+ These operators inherit from base operator classes in sage.kernel.operators
9
+ and implement tool-specific business logic.
10
+
11
+ Note: Some tools require heavy dependencies (torch, transformers).
12
+ They are loaded lazily and will raise ImportError if dependencies are missing.
13
+ """
14
+
15
+ import warnings
16
+ from typing import TYPE_CHECKING
17
+
18
+ # Core tools (minimal dependencies)
19
+ from sage.middleware.operators.tools.arxiv_paper_searcher import _Searcher_Tool
20
+ from sage.middleware.operators.tools.arxiv_searcher import ArxivSearcher
21
+ from sage.middleware.operators.tools.nature_news_fetcher import Nature_News_Fetcher_Tool
22
+ from sage.middleware.operators.tools.searcher_tool import BochaSearchTool
23
+ from sage.middleware.operators.tools.url_text_extractor import URL_Text_Extractor_Tool
24
+
25
+ # Heavy tools (require torch/transformers) - lazy load
26
+ _HEAVY_TOOLS_LOADED = False
27
+ ImageCaptioner = None # type: ignore
28
+ text_detector = None # type: ignore
29
+
30
+
31
+ def _load_heavy_tools():
32
+ """Load tools that require torch/transformers."""
33
+ global _HEAVY_TOOLS_LOADED, ImageCaptioner, text_detector
34
+ if _HEAVY_TOOLS_LOADED:
35
+ return
36
+ try:
37
+ from sage.middleware.operators.tools.image_captioner import ImageCaptioner as _IC
38
+ from sage.middleware.operators.tools.text_detector import text_detector as _TD
39
+
40
+ ImageCaptioner = _IC
41
+ text_detector = _TD
42
+ _HEAVY_TOOLS_LOADED = True
43
+ except ImportError as e:
44
+ warnings.warn(
45
+ f"Heavy tool operators not available: {e}\n"
46
+ "Install with: pip install torch transformers",
47
+ UserWarning,
48
+ stacklevel=2,
49
+ )
50
+
51
+
52
+ def __getattr__(name: str):
53
+ """Lazy load heavy tools on access."""
54
+ if name in ("ImageCaptioner", "text_detector"):
55
+ _load_heavy_tools()
56
+ if name == "ImageCaptioner":
57
+ return ImageCaptioner
58
+ if name == "text_detector":
59
+ return text_detector
60
+ raise AttributeError(f"module {__name__!r} has no attribute {name!r}")
61
+
62
+
63
+ __all__ = [
64
+ "BochaSearchTool",
65
+ "_Searcher_Tool",
66
+ "ArxivSearcher",
67
+ "Nature_News_Fetcher_Tool",
68
+ "ImageCaptioner",
69
+ "text_detector",
70
+ "URL_Text_Extractor_Tool",
71
+ ]
@@ -0,0 +1,175 @@
1
+ import logging
2
+ import re
3
+
4
+ import requests
5
+ from bs4 import BeautifulSoup
6
+ from bs4.element import Tag
7
+
8
+ from sage.libs.foundation.tools.tool import BaseTool
9
+
10
+
11
+ class _Searcher_Tool(BaseTool):
12
+ def __init__(self):
13
+ super().__init__(
14
+ tool_name="_Searcher_Tool",
15
+ tool_description="A tool that searches arXiv for papers based on a given query.",
16
+ input_types={
17
+ "query": "str - The search query for arXiv papers.",
18
+ "size": "int - The number of results per page (25, 50, 100, or 200). If None, use 25.",
19
+ "max_results": "int - The maximum number of papers to return (default: 25). Should be less than or equal to 100.",
20
+ },
21
+ output_type="list - A list of dictionaries containing paper information.",
22
+ demo_commands=[
23
+ {
24
+ "command": 'execution = tool.execute(query="tool agents with large language models")',
25
+ "description": "Search for papers about tool agents with large language models.",
26
+ },
27
+ {
28
+ "command": 'execution = tool.execute(query="quantum computing", size=100, max_results=50)',
29
+ "description": "Search for quantum computing papers, with 100 results per page, returning a maximum of 50 papers.",
30
+ },
31
+ {
32
+ "command": 'execution = tool.execute(query="machine learning", max_results=75)',
33
+ "description": "Search for machine learning papers, returning a maximum of 75 papers.",
34
+ },
35
+ ],
36
+ )
37
+ # Store additional metadata as instance variables
38
+ self.tool_version = "1.0.0"
39
+ self.valid_sizes = [25, 50, 100, 200]
40
+ self.base_url = "https://arxiv.org/search/"
41
+
42
+ def build_tool(self):
43
+ """
44
+ No specific build required for this tool.
45
+ """
46
+ pass
47
+
48
+ def execute(self, query, size=None, max_results=25):
49
+ """
50
+ Executes the arXiv search tool to find papers based on the given query.
51
+
52
+ Parameters:
53
+ query (str): The search query for arXiv papers.
54
+ size (int): The number of results per page.
55
+ max_results (int): The maximum number of papers to return.
56
+
57
+ Returns:
58
+ list: A list of dictionaries containing paper information.
59
+ """
60
+ valid_sizes = self.valid_sizes
61
+ base_url = self.base_url
62
+
63
+ if size is None:
64
+ size = 25
65
+ elif size not in valid_sizes:
66
+ size = min(valid_sizes, key=lambda x: abs(x - size))
67
+
68
+ results = []
69
+ start = 0
70
+
71
+ max_results = min(max_results, 100) # NOTE: For traffic reasons, limit to 100 results
72
+
73
+ while len(results) < max_results:
74
+ params = {
75
+ "searchtype": "all",
76
+ "query": query,
77
+ "abstracts": "show",
78
+ "order": "",
79
+ "size": str(size),
80
+ "start": str(start),
81
+ }
82
+
83
+ try:
84
+ response = requests.get(base_url, params=params)
85
+ soup = BeautifulSoup(response.content, "html.parser")
86
+
87
+ papers = soup.find_all("li", class_="arxiv-result") # type: ignore
88
+ if not papers:
89
+ break
90
+
91
+ for paper in papers:
92
+ if len(results) >= max_results:
93
+ break
94
+
95
+ title_elem = paper.find("p", class_="title") # type: ignore
96
+ title = title_elem.text.strip() if title_elem else "No title found"
97
+
98
+ authors_elem = paper.find("p", class_="authors") # type: ignore
99
+ authors = authors_elem.text.strip() if authors_elem else "No authors found"
100
+ authors = re.sub(r"^Authors:\s*", "", authors)
101
+ authors = re.sub(r"\s+", " ", authors).strip()
102
+
103
+ abstract_elem = paper.find("span", class_="abstract-full") # type: ignore
104
+ abstract = (
105
+ abstract_elem.text.strip() if abstract_elem else "No abstract available"
106
+ )
107
+ abstract = abstract.replace("△ Less", "").strip()
108
+
109
+ link_elem = paper.find("p", class_="list-title") # type: ignore
110
+ link_tag = link_elem.find("a") if isinstance(link_elem, Tag) else None # type: ignore
111
+ link = (
112
+ link_tag["href"]
113
+ if isinstance(link_tag, Tag) and link_tag.has_attr("href")
114
+ else "No link found"
115
+ )
116
+
117
+ results.append(
118
+ {
119
+ "title": title,
120
+ "authors": authors,
121
+ "abstract": abstract,
122
+ "link": link,
123
+ }
124
+ )
125
+
126
+ start += size
127
+
128
+ except Exception as e:
129
+ logging.error(f"Error searching arXiv: {e}")
130
+ break
131
+
132
+ return results[:max_results]
133
+
134
+ def get_metadata(self):
135
+ """
136
+ Returns the metadata for the _Searcher_Tool.
137
+
138
+ Returns:
139
+ dict: A dictionary containing the tool's metadata.
140
+ """
141
+ metadata = super().get_metadata()
142
+ return metadata
143
+
144
+
145
+ if __name__ == "__main__":
146
+ import json
147
+
148
+ print("ArXiv Search Tool Test")
149
+
150
+ # Example usage of the _Searcher_Tool
151
+ tool = _Searcher_Tool()
152
+
153
+ # Get tool metadata
154
+ metadata = tool.get_metadata()
155
+ print("Tool Metadata:")
156
+ print(metadata)
157
+
158
+ # Sample query for searching arXiv
159
+ query = ""
160
+ # Execute the tool
161
+ try:
162
+ execution = tool.execute(query=query, size=50, max_results=10)
163
+ print("\n==>> Execution:")
164
+ print(json.dumps(execution, indent=4)) # Pretty print JSON
165
+ print("\n==>> Search Results:")
166
+ for i, paper in enumerate(execution, 1):
167
+ print(f"{i}. {paper['title']}")
168
+ print(f" Authors: {paper['authors']}")
169
+ print(f" Abstract: {paper['abstract'][:2000]}")
170
+ print(f" Link: {paper['link']}")
171
+ print()
172
+ except Exception as e:
173
+ print(f"Execution failed: {e}")
174
+
175
+ print("Done!")
@@ -0,0 +1,102 @@
1
+ """
2
+ Arxiv 论文搜索工具 (Real Implementation)
3
+ """
4
+
5
+ import asyncio
6
+ import logging
7
+ import urllib.parse
8
+ from typing import Any
9
+
10
+ import aiohttp
11
+ import feedparser
12
+
13
+ from sage.libs.foundation.tools.tool import BaseTool
14
+
15
+ logger = logging.getLogger(__name__)
16
+
17
+
18
+ class ArxivSearcher(BaseTool):
19
+ """Arxiv 学术论文搜索工具"""
20
+
21
+ def __init__(self):
22
+ super().__init__(
23
+ tool_name="arxiv_searcher",
24
+ tool_description="Search Arxiv for academic papers. Returns title, authors, summary, and link.",
25
+ input_types=["str"],
26
+ output_type="list",
27
+ demo_commands=["search for transformer papers", "find papers about LLM agents"],
28
+ require_llm_engine=False,
29
+ )
30
+ self.base_url = "http://export.arxiv.org/api/query"
31
+
32
+ async def execute(self, query: str, max_results: int = 5) -> list[dict[str, Any]]:
33
+ """
34
+ Execute Arxiv search.
35
+ """
36
+ logger.info(f"Searching Arxiv for: {query}")
37
+
38
+ # Construct API query
39
+ # search_query=all:electron&start=0&max_results=10
40
+ params = {
41
+ "search_query": f"all:{query}",
42
+ "start": 0,
43
+ "max_results": max_results,
44
+ "sortBy": "relevance",
45
+ "sortOrder": "descending",
46
+ }
47
+
48
+ url = f"{self.base_url}?{urllib.parse.urlencode(params)}"
49
+
50
+ try:
51
+ async with aiohttp.ClientSession() as session:
52
+ async with session.get(url) as response:
53
+ if response.status != 200:
54
+ logger.error(f"Arxiv API failed with status {response.status}")
55
+ return []
56
+
57
+ content = await response.text()
58
+
59
+ # Parse with feedparser
60
+ feed = feedparser.parse(content)
61
+
62
+ results = []
63
+ for entry in feed.entries:
64
+ paper = {
65
+ "title": entry.title.replace("\n", " ").strip(),
66
+ "authors": [author.name for author in entry.authors],
67
+ "summary": entry.summary.replace("\n", " ").strip(),
68
+ "published": entry.published,
69
+ "link": entry.link,
70
+ "pdf_link": next(
71
+ (link.href for link in entry.links if link.title == "pdf"), None
72
+ ),
73
+ }
74
+ results.append(paper)
75
+
76
+ logger.info(f"Found {len(results)} papers")
77
+ return results
78
+
79
+ except Exception as e:
80
+ logger.error(f"Arxiv search failed: {e}")
81
+ return []
82
+
83
+ def call(self, arguments: dict) -> Any:
84
+ """Sync wrapper for MCP"""
85
+ query = arguments.get("query")
86
+ if not query:
87
+ return []
88
+
89
+ # Check for running loop
90
+ try:
91
+ loop = asyncio.get_running_loop()
92
+ if loop.is_running():
93
+ # If we are in a loop, we can't use asyncio.run.
94
+ # But AgentRuntime calls tools synchronously?
95
+ # If AgentRuntime is running in a thread, we can use asyncio.run.
96
+ # If AgentRuntime is running in the main loop, we are in trouble.
97
+ # But Gateway runs AgentRuntime in run_in_executor.
98
+ return asyncio.run(self.execute(query))
99
+ except RuntimeError:
100
+ return asyncio.run(self.execute(query))
101
+
102
+ return asyncio.run(self.execute(query))
@@ -0,0 +1,105 @@
1
+ """
2
+ DuckDuckGo web search tool (no API key required).
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import asyncio
8
+ import logging
9
+ from typing import Any
10
+
11
+ import aiohttp
12
+ from bs4 import BeautifulSoup
13
+ from pydantic import BaseModel, Field
14
+
15
+ from sage.libs.foundation.tools.tool import BaseTool
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class DuckDuckGoSearchInput(BaseModel):
21
+ query: str = Field(..., description="Search query text")
22
+ max_results: int = Field(5, description="Number of results to return", ge=1, le=20)
23
+
24
+
25
+ class DuckDuckGoSearcher(BaseTool):
26
+ """Simple HTML-based DuckDuckGo searcher.
27
+
28
+ Uses the public HTML endpoint (no API key) and extracts title/link/snippet.
29
+ Intended for lightweight research fallback when no commercial search API is configured.
30
+ """
31
+
32
+ def __init__(self):
33
+ super().__init__(
34
+ tool_name="duckduckgo_search",
35
+ tool_description="Search the web via DuckDuckGo (HTML endpoint). Returns title, link, and snippet.",
36
+ input_types={"query": "str - search query", "max_results": "int - number of results"},
37
+ output_type="list",
38
+ demo_commands=[
39
+ "search for latest vector database papers",
40
+ "find recent ML system posts",
41
+ ],
42
+ require_llm_engine=False,
43
+ )
44
+
45
+ async def execute(self, query: str, max_results: int = 5) -> list[dict[str, Any]]:
46
+ url = "https://duckduckgo.com/html"
47
+ params = {"q": query, "kl": "us-en"}
48
+
49
+ try:
50
+ async with aiohttp.ClientSession() as session:
51
+ async with session.post(url, data=params, timeout=15) as resp:
52
+ if resp.status != 200:
53
+ logger.warning("DuckDuckGo returned status %s", resp.status)
54
+ return []
55
+ html = await resp.text()
56
+ except Exception as exc: # noqa: BLE001
57
+ logger.error("DuckDuckGo search failed: %s", exc)
58
+ return []
59
+
60
+ soup = BeautifulSoup(html, "html.parser")
61
+ results: list[dict[str, Any]] = []
62
+
63
+ for result in soup.select("div.result"):
64
+ if len(results) >= max_results:
65
+ break
66
+
67
+ link_tag = result.select_one("a.result__a")
68
+ snippet_tag = result.select_one("a.result__snippet") or result.select_one(
69
+ "div.result__snippet"
70
+ )
71
+
72
+ title = link_tag.get_text(strip=True) if link_tag else ""
73
+ href = link_tag.get("href") if link_tag else ""
74
+ snippet = snippet_tag.get_text(strip=True) if snippet_tag else ""
75
+
76
+ if not href:
77
+ continue
78
+
79
+ results.append(
80
+ {
81
+ "title": title,
82
+ "link": href,
83
+ "content": snippet,
84
+ "source": "duckduckgo",
85
+ }
86
+ )
87
+
88
+ return results
89
+
90
+ def call(self, arguments: dict) -> Any:
91
+ """Sync wrapper used by MCP/AgentRuntime."""
92
+ query = arguments.get("query")
93
+ if not query:
94
+ return []
95
+
96
+ max_results = arguments.get("max_results", 5)
97
+
98
+ try:
99
+ loop = asyncio.get_running_loop()
100
+ if loop.is_running():
101
+ return asyncio.run(self.execute(query, max_results=max_results))
102
+ except RuntimeError:
103
+ return asyncio.run(self.execute(query, max_results=max_results))
104
+
105
+ return asyncio.run(self.execute(query, max_results=max_results))