isage-middleware 0.2.4.3__cp311-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isage_middleware-0.2.4.3.dist-info/METADATA +266 -0
- isage_middleware-0.2.4.3.dist-info/RECORD +94 -0
- isage_middleware-0.2.4.3.dist-info/WHEEL +5 -0
- isage_middleware-0.2.4.3.dist-info/top_level.txt +1 -0
- sage/middleware/__init__.py +59 -0
- sage/middleware/_version.py +6 -0
- sage/middleware/components/__init__.py +30 -0
- sage/middleware/components/extensions_compat.py +141 -0
- sage/middleware/components/sage_db/__init__.py +116 -0
- sage/middleware/components/sage_db/backend.py +136 -0
- sage/middleware/components/sage_db/service.py +15 -0
- sage/middleware/components/sage_flow/__init__.py +76 -0
- sage/middleware/components/sage_flow/python/__init__.py +14 -0
- sage/middleware/components/sage_flow/python/micro_service/__init__.py +4 -0
- sage/middleware/components/sage_flow/python/micro_service/sage_flow_service.py +88 -0
- sage/middleware/components/sage_flow/python/sage_flow.py +30 -0
- sage/middleware/components/sage_flow/service.py +14 -0
- sage/middleware/components/sage_mem/__init__.py +83 -0
- sage/middleware/components/sage_sias/__init__.py +59 -0
- sage/middleware/components/sage_sias/continual_learner.py +184 -0
- sage/middleware/components/sage_sias/coreset_selector.py +302 -0
- sage/middleware/components/sage_sias/types.py +94 -0
- sage/middleware/components/sage_tsdb/__init__.py +81 -0
- sage/middleware/components/sage_tsdb/python/__init__.py +21 -0
- sage/middleware/components/sage_tsdb/python/_sage_tsdb.pyi +17 -0
- sage/middleware/components/sage_tsdb/python/algorithms/__init__.py +17 -0
- sage/middleware/components/sage_tsdb/python/algorithms/base.py +51 -0
- sage/middleware/components/sage_tsdb/python/algorithms/out_of_order_join.py +248 -0
- sage/middleware/components/sage_tsdb/python/algorithms/window_aggregator.py +296 -0
- sage/middleware/components/sage_tsdb/python/micro_service/__init__.py +7 -0
- sage/middleware/components/sage_tsdb/python/micro_service/sage_tsdb_service.py +365 -0
- sage/middleware/components/sage_tsdb/python/sage_tsdb.py +523 -0
- sage/middleware/components/sage_tsdb/service.py +17 -0
- sage/middleware/components/vector_stores/__init__.py +25 -0
- sage/middleware/components/vector_stores/chroma.py +483 -0
- sage/middleware/components/vector_stores/chroma_adapter.py +185 -0
- sage/middleware/components/vector_stores/milvus.py +677 -0
- sage/middleware/operators/__init__.py +56 -0
- sage/middleware/operators/agent/__init__.py +24 -0
- sage/middleware/operators/agent/planning/__init__.py +5 -0
- sage/middleware/operators/agent/planning/llm_adapter.py +41 -0
- sage/middleware/operators/agent/planning/planner_adapter.py +98 -0
- sage/middleware/operators/agent/planning/router.py +107 -0
- sage/middleware/operators/agent/runtime.py +296 -0
- sage/middleware/operators/agentic/__init__.py +41 -0
- sage/middleware/operators/agentic/config.py +254 -0
- sage/middleware/operators/agentic/planning_operator.py +125 -0
- sage/middleware/operators/agentic/refined_searcher.py +132 -0
- sage/middleware/operators/agentic/runtime.py +241 -0
- sage/middleware/operators/agentic/timing_operator.py +125 -0
- sage/middleware/operators/agentic/tool_selection_operator.py +127 -0
- sage/middleware/operators/context/__init__.py +17 -0
- sage/middleware/operators/context/critic_evaluation.py +16 -0
- sage/middleware/operators/context/model_context.py +565 -0
- sage/middleware/operators/context/quality_label.py +12 -0
- sage/middleware/operators/context/search_query_results.py +61 -0
- sage/middleware/operators/context/search_result.py +42 -0
- sage/middleware/operators/context/search_session.py +79 -0
- sage/middleware/operators/filters/__init__.py +26 -0
- sage/middleware/operators/filters/context_sink.py +387 -0
- sage/middleware/operators/filters/context_source.py +376 -0
- sage/middleware/operators/filters/evaluate_filter.py +83 -0
- sage/middleware/operators/filters/tool_filter.py +74 -0
- sage/middleware/operators/llm/__init__.py +18 -0
- sage/middleware/operators/llm/sagellm_generator.py +432 -0
- sage/middleware/operators/rag/__init__.py +147 -0
- sage/middleware/operators/rag/arxiv.py +331 -0
- sage/middleware/operators/rag/chunk.py +13 -0
- sage/middleware/operators/rag/document_loaders.py +23 -0
- sage/middleware/operators/rag/evaluate.py +658 -0
- sage/middleware/operators/rag/generator.py +340 -0
- sage/middleware/operators/rag/index_builder/__init__.py +48 -0
- sage/middleware/operators/rag/index_builder/builder.py +363 -0
- sage/middleware/operators/rag/index_builder/manifest.py +101 -0
- sage/middleware/operators/rag/index_builder/storage.py +131 -0
- sage/middleware/operators/rag/pipeline.py +46 -0
- sage/middleware/operators/rag/profiler.py +59 -0
- sage/middleware/operators/rag/promptor.py +400 -0
- sage/middleware/operators/rag/refiner.py +231 -0
- sage/middleware/operators/rag/reranker.py +364 -0
- sage/middleware/operators/rag/retriever.py +1308 -0
- sage/middleware/operators/rag/searcher.py +37 -0
- sage/middleware/operators/rag/types.py +28 -0
- sage/middleware/operators/rag/writer.py +80 -0
- sage/middleware/operators/tools/__init__.py +71 -0
- sage/middleware/operators/tools/arxiv_paper_searcher.py +175 -0
- sage/middleware/operators/tools/arxiv_searcher.py +102 -0
- sage/middleware/operators/tools/duckduckgo_searcher.py +105 -0
- sage/middleware/operators/tools/image_captioner.py +104 -0
- sage/middleware/operators/tools/nature_news_fetcher.py +224 -0
- sage/middleware/operators/tools/searcher_tool.py +514 -0
- sage/middleware/operators/tools/text_detector.py +185 -0
- sage/middleware/operators/tools/url_text_extractor.py +104 -0
- sage/middleware/py.typed +2 -0
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
import os
|
|
2
|
+
import time
|
|
3
|
+
|
|
4
|
+
from sage.libs.foundation.tools.tool import BaseTool
|
|
5
|
+
|
|
6
|
+
try:
|
|
7
|
+
from isagellm import UnifiedInferenceClient
|
|
8
|
+
except ImportError:
|
|
9
|
+
UnifiedInferenceClient = None # Optional: isagellm not installed
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class ImageCaptioner(BaseTool):
|
|
13
|
+
def __init__(self, model_name: str = "meta-llama/Llama-2-13b-chat-hf"):
|
|
14
|
+
super().__init__(
|
|
15
|
+
tool_name="image_captioner",
|
|
16
|
+
tool_description="A tool that can generate captions for images ",
|
|
17
|
+
input_types={
|
|
18
|
+
"image_path": "The path to the image to caption",
|
|
19
|
+
"prompt": "The prompt to generate the caption",
|
|
20
|
+
},
|
|
21
|
+
demo_commands=[
|
|
22
|
+
{
|
|
23
|
+
"command": 'execution = tool.execute(image="path/to/image.png")',
|
|
24
|
+
"description": "Generate a caption for an image using the default prompt and model.",
|
|
25
|
+
},
|
|
26
|
+
{
|
|
27
|
+
"command": 'execution = tool.execute(image="path/to/image.png", prompt="A beautiful landscape")',
|
|
28
|
+
"description": "Generate a caption for an image using a custom prompt and model.",
|
|
29
|
+
},
|
|
30
|
+
],
|
|
31
|
+
require_llm_engine=True, # This tool requires an LLM engine
|
|
32
|
+
)
|
|
33
|
+
# Store additional metadata and model configuration as instance variables
|
|
34
|
+
self.tool_version = "1.0.0"
|
|
35
|
+
self.limitation = "The Image_Captioner_Tool provides general image descriptions but has limitations: 1) May make mistakes in complex scenes, counting, attribute detection, and understanding object relationships. 2) Might not generate comprehensive captions, especially for images with multiple objects or abstract concepts. 3) Performance varies with image complexity. 4) Struggles with culturally specific or domain-specific content. 5) May overlook details or misinterpret object relationships. For precise descriptions, consider: using it with other tools for context/verification, as an initial step before refinement, or in multi-step processes for ambiguity resolution. Verify critical information with specialized tools or human expertise when necessary."
|
|
36
|
+
self.model_name = model_name
|
|
37
|
+
print(f"ImageCaptioner initialized with model: {model_name}")
|
|
38
|
+
|
|
39
|
+
def execute(self, image_path: str):
|
|
40
|
+
try:
|
|
41
|
+
if not self.model_name:
|
|
42
|
+
raise ValueError(
|
|
43
|
+
"Model name is not set. Please set the model name using set_model_name() before executing the tool."
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# Construct the messages parameter for UnifiedInferenceClient
|
|
47
|
+
messages = [
|
|
48
|
+
{"role": "system", "content": "You are an image captioning assistant."},
|
|
49
|
+
{
|
|
50
|
+
"role": "user",
|
|
51
|
+
"content": f"Generate a caption for the image at path: {image_path}",
|
|
52
|
+
},
|
|
53
|
+
]
|
|
54
|
+
|
|
55
|
+
# Use auto-detection for best available LLM service
|
|
56
|
+
client = UnifiedInferenceClient.create()
|
|
57
|
+
|
|
58
|
+
# Retry mechanism for connection errors
|
|
59
|
+
max_retries = 5
|
|
60
|
+
retry_delay = 3 # seconds
|
|
61
|
+
|
|
62
|
+
for attempt in range(max_retries):
|
|
63
|
+
try:
|
|
64
|
+
response = client.chat(messages)
|
|
65
|
+
return response
|
|
66
|
+
except ConnectionError as e:
|
|
67
|
+
print(f"Connection error on attempt {attempt + 1}: {e}")
|
|
68
|
+
if attempt < max_retries - 1:
|
|
69
|
+
print(f"Retrying in {retry_delay} seconds...")
|
|
70
|
+
time.sleep(retry_delay)
|
|
71
|
+
else:
|
|
72
|
+
raise
|
|
73
|
+
except Exception as e:
|
|
74
|
+
print(f"Error in ImageCaptioner: {e}")
|
|
75
|
+
return None
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
if __name__ == "__main__":
|
|
79
|
+
import json
|
|
80
|
+
|
|
81
|
+
# Get the directory of the current script
|
|
82
|
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
83
|
+
|
|
84
|
+
# Example usage of the Image_Captioner_Tool
|
|
85
|
+
# tool = Image_Captioner_Tool()
|
|
86
|
+
tool = ImageCaptioner(model_name="meta-llama/Llama-2-13b-chat-hf")
|
|
87
|
+
|
|
88
|
+
# Get tool metadata
|
|
89
|
+
metadata = tool.get_metadata()
|
|
90
|
+
print(metadata)
|
|
91
|
+
|
|
92
|
+
# Construct the full path to the image using the script's directory
|
|
93
|
+
relative_image_path = "examples/baseball.png"
|
|
94
|
+
image_path = os.path.join(script_dir, relative_image_path)
|
|
95
|
+
|
|
96
|
+
# Execute the tool with default prompt
|
|
97
|
+
try:
|
|
98
|
+
execution = tool.execute(image_path=image_path)
|
|
99
|
+
print("Generated Caption:")
|
|
100
|
+
print(json.dumps(execution, indent=4))
|
|
101
|
+
except Exception as e:
|
|
102
|
+
print(f"Execution failed: {e}")
|
|
103
|
+
|
|
104
|
+
print("Done!")
|
|
@@ -0,0 +1,224 @@
|
|
|
1
|
+
import logging
|
|
2
|
+
import os
|
|
3
|
+
import random
|
|
4
|
+
import time
|
|
5
|
+
|
|
6
|
+
import requests
|
|
7
|
+
from bs4 import BeautifulSoup, Tag
|
|
8
|
+
|
|
9
|
+
from sage.libs.foundation.tools.tool import BaseTool
|
|
10
|
+
|
|
11
|
+
# Initialize logger
|
|
12
|
+
logger = logging.getLogger(__name__)
|
|
13
|
+
logging.basicConfig(level=logging.INFO)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class Nature_News_Fetcher_Tool(BaseTool):
|
|
17
|
+
def __init__(self):
|
|
18
|
+
super().__init__(
|
|
19
|
+
tool_name="Nature_News_Fetcher_Tool",
|
|
20
|
+
tool_description="A tool that fetches the latest news articles from Nature.",
|
|
21
|
+
input_types={
|
|
22
|
+
"num_articles": "int - The number of articles to fetch (default: 100).",
|
|
23
|
+
"max_pages": "int - The maximum number of pages to fetch (default: 5).",
|
|
24
|
+
},
|
|
25
|
+
output_type="list - A list of dictionaries containing information about the latest Nature news articles.",
|
|
26
|
+
demo_commands=[
|
|
27
|
+
{
|
|
28
|
+
"command": "execution = tool.execute()",
|
|
29
|
+
"description": "Fetch the latest 100 news articles from Nature.",
|
|
30
|
+
},
|
|
31
|
+
{
|
|
32
|
+
"command": "execution = tool.execute(num_articles=50, max_pages=3)",
|
|
33
|
+
"description": "Fetch the latest 50 news articles from Nature, searching up to 3 pages.",
|
|
34
|
+
},
|
|
35
|
+
],
|
|
36
|
+
)
|
|
37
|
+
self.tool_version = "1.0.0"
|
|
38
|
+
self.base_url = "https://www.nature.com/nature/articles"
|
|
39
|
+
# 控制每次抓取后的等待时间,可在测试中覆盖
|
|
40
|
+
self.sleep_time = 1
|
|
41
|
+
|
|
42
|
+
def fetch_page(self, page_number):
|
|
43
|
+
"""
|
|
44
|
+
Fetches a single page of news articles from Nature's website.
|
|
45
|
+
|
|
46
|
+
Parameters:
|
|
47
|
+
page_number (int): The page number to fetch.
|
|
48
|
+
|
|
49
|
+
Returns:
|
|
50
|
+
str: The HTML content of the page.
|
|
51
|
+
"""
|
|
52
|
+
params = {
|
|
53
|
+
"searchType": "journalSearch",
|
|
54
|
+
"sort": "PubDate",
|
|
55
|
+
"type": "news",
|
|
56
|
+
"page": str(page_number),
|
|
57
|
+
}
|
|
58
|
+
user_agents = [
|
|
59
|
+
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
|
60
|
+
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
|
61
|
+
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36",
|
|
62
|
+
]
|
|
63
|
+
headers = {"User-Agent": random.choice(user_agents)}
|
|
64
|
+
response = requests.get(self.base_url, params=params, headers=headers)
|
|
65
|
+
response.raise_for_status()
|
|
66
|
+
return response.text
|
|
67
|
+
|
|
68
|
+
def parse_articles(self, html_content):
|
|
69
|
+
"""
|
|
70
|
+
Parses the HTML content and extracts article information.
|
|
71
|
+
|
|
72
|
+
Parameters:
|
|
73
|
+
html_content (str): The HTML content of the page.
|
|
74
|
+
|
|
75
|
+
Returns:
|
|
76
|
+
list: A list of dictionaries containing article information.
|
|
77
|
+
"""
|
|
78
|
+
soup = BeautifulSoup(html_content, "html.parser")
|
|
79
|
+
articles_section = soup.find("section", id="new-article-list")
|
|
80
|
+
if not isinstance(articles_section, Tag):
|
|
81
|
+
return []
|
|
82
|
+
|
|
83
|
+
articles = []
|
|
84
|
+
for article in articles_section.find_all("article", class_="c-card"): # type: ignore
|
|
85
|
+
if not isinstance(article, Tag):
|
|
86
|
+
continue
|
|
87
|
+
|
|
88
|
+
title_elem = article.find("h3", class_="c-card__title") # type: ignore
|
|
89
|
+
title = title_elem.text.strip() if isinstance(title_elem, Tag) else "No title found"
|
|
90
|
+
|
|
91
|
+
url_elem = title_elem.find("a") if isinstance(title_elem, Tag) else None # type: ignore
|
|
92
|
+
url = (
|
|
93
|
+
"https://www.nature.com" + str(url_elem["href"])
|
|
94
|
+
if isinstance(url_elem, Tag) and url_elem.has_attr("href")
|
|
95
|
+
else "No URL found"
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
description_elem = article.find("div", {"data-test": "article-description"}) # type: ignore
|
|
99
|
+
description = (
|
|
100
|
+
description_elem.text.strip()
|
|
101
|
+
if isinstance(description_elem, Tag)
|
|
102
|
+
else "No description available"
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
authors_elem = article.find("ul", {"data-test": "author-list"}) # type: ignore
|
|
106
|
+
authors = (
|
|
107
|
+
[
|
|
108
|
+
author.text.strip()
|
|
109
|
+
for author in authors_elem.find_all("li")
|
|
110
|
+
if isinstance(author, Tag)
|
|
111
|
+
]
|
|
112
|
+
if isinstance(authors_elem, Tag)
|
|
113
|
+
else ["No authors found"]
|
|
114
|
+
)
|
|
115
|
+
|
|
116
|
+
date_elem = article.find("time") # type: ignore
|
|
117
|
+
date = (
|
|
118
|
+
date_elem["datetime"]
|
|
119
|
+
if isinstance(date_elem, Tag) and date_elem.has_attr("datetime")
|
|
120
|
+
else "No date found"
|
|
121
|
+
)
|
|
122
|
+
|
|
123
|
+
image_elem = article.find("img") # type: ignore
|
|
124
|
+
image_url = (
|
|
125
|
+
image_elem["src"]
|
|
126
|
+
if isinstance(image_elem, Tag) and image_elem.has_attr("src")
|
|
127
|
+
else "No image found"
|
|
128
|
+
)
|
|
129
|
+
|
|
130
|
+
articles.append(
|
|
131
|
+
{
|
|
132
|
+
"title": title,
|
|
133
|
+
"url": url,
|
|
134
|
+
"description": description,
|
|
135
|
+
"authors": authors,
|
|
136
|
+
"date": date,
|
|
137
|
+
"image_url": image_url,
|
|
138
|
+
}
|
|
139
|
+
)
|
|
140
|
+
|
|
141
|
+
return articles
|
|
142
|
+
|
|
143
|
+
def execute(self, num_articles=100, max_pages=5):
|
|
144
|
+
"""
|
|
145
|
+
Fetches the latest news articles from Nature's website.
|
|
146
|
+
|
|
147
|
+
Parameters:
|
|
148
|
+
num_articles (int): The number of articles to fetch.
|
|
149
|
+
max_pages (int): The maximum number of pages to fetch.
|
|
150
|
+
|
|
151
|
+
Returns:
|
|
152
|
+
list: A list of dictionaries containing article information.
|
|
153
|
+
"""
|
|
154
|
+
all_articles = []
|
|
155
|
+
page_number = 1
|
|
156
|
+
|
|
157
|
+
try:
|
|
158
|
+
while len(all_articles) < num_articles and page_number <= max_pages:
|
|
159
|
+
html_content = self.fetch_page(page_number)
|
|
160
|
+
page_articles = self.parse_articles(html_content)
|
|
161
|
+
|
|
162
|
+
if not page_articles:
|
|
163
|
+
logger.info(f"No articles found on page {page_number}. Stopping fetch.")
|
|
164
|
+
break # No more articles found
|
|
165
|
+
|
|
166
|
+
all_articles.extend(page_articles)
|
|
167
|
+
page_number += 1
|
|
168
|
+
# 只有在还需抓取下一页时才 sleep
|
|
169
|
+
if len(all_articles) < num_articles and page_number <= max_pages:
|
|
170
|
+
time.sleep(self.sleep_time) # Be polite to the server
|
|
171
|
+
|
|
172
|
+
return all_articles[:num_articles]
|
|
173
|
+
except requests.exceptions.RequestException as e:
|
|
174
|
+
logger.error(f"Network error occurred: {e}")
|
|
175
|
+
return [{"error": f"Network error: {str(e)}"}]
|
|
176
|
+
except Exception as e:
|
|
177
|
+
logger.error(f"An unexpected error occurred: {e}")
|
|
178
|
+
return [{"error": f"Unexpected error: {str(e)}"}]
|
|
179
|
+
|
|
180
|
+
def get_metadata(self):
|
|
181
|
+
"""
|
|
182
|
+
Returns the metadata for the Nature_News_Fetcher_Tool.
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
dict: A dictionary containing the tool's metadata.
|
|
186
|
+
"""
|
|
187
|
+
if hasattr(super(), "get_metadata"):
|
|
188
|
+
metadata = super().get_metadata()
|
|
189
|
+
else:
|
|
190
|
+
metadata = {}
|
|
191
|
+
return metadata
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
if __name__ == "__main__":
|
|
195
|
+
# Get the directory of the current script
|
|
196
|
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
197
|
+
|
|
198
|
+
# Example usage of the Nature_News_Fetcher_Tool
|
|
199
|
+
tool = Nature_News_Fetcher_Tool()
|
|
200
|
+
|
|
201
|
+
# Get tool metadata
|
|
202
|
+
metadata = tool.get_metadata()
|
|
203
|
+
print(metadata)
|
|
204
|
+
|
|
205
|
+
import json
|
|
206
|
+
|
|
207
|
+
# Execute the tool to fetch the latest 10 articles (for demonstration purposes)
|
|
208
|
+
try:
|
|
209
|
+
execution = tool.execute(num_articles=10, max_pages=1)
|
|
210
|
+
print(json.dumps(execution, indent=4))
|
|
211
|
+
print("\nExecution Result:")
|
|
212
|
+
print(f"Number of articles fetched: {len(execution)}")
|
|
213
|
+
print("\nSample articles:")
|
|
214
|
+
for i, article in enumerate(execution[:10], 1):
|
|
215
|
+
print(f"\n{i}. Title: {article['title']}")
|
|
216
|
+
print(f" URL: {article['url']}")
|
|
217
|
+
print(f" Description: {article['description'][:100]}...") # Show first 100 characters
|
|
218
|
+
print(f" Authors: {', '.join(article['authors'])}")
|
|
219
|
+
print(f" Date: {article['date']}")
|
|
220
|
+
print(f" Image URL: {article['image_url']}")
|
|
221
|
+
except Exception as e:
|
|
222
|
+
print(f"Execution failed: {e}")
|
|
223
|
+
|
|
224
|
+
print("Done!")
|