isage-middleware 0.2.4.3__cp311-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- isage_middleware-0.2.4.3.dist-info/METADATA +266 -0
- isage_middleware-0.2.4.3.dist-info/RECORD +94 -0
- isage_middleware-0.2.4.3.dist-info/WHEEL +5 -0
- isage_middleware-0.2.4.3.dist-info/top_level.txt +1 -0
- sage/middleware/__init__.py +59 -0
- sage/middleware/_version.py +6 -0
- sage/middleware/components/__init__.py +30 -0
- sage/middleware/components/extensions_compat.py +141 -0
- sage/middleware/components/sage_db/__init__.py +116 -0
- sage/middleware/components/sage_db/backend.py +136 -0
- sage/middleware/components/sage_db/service.py +15 -0
- sage/middleware/components/sage_flow/__init__.py +76 -0
- sage/middleware/components/sage_flow/python/__init__.py +14 -0
- sage/middleware/components/sage_flow/python/micro_service/__init__.py +4 -0
- sage/middleware/components/sage_flow/python/micro_service/sage_flow_service.py +88 -0
- sage/middleware/components/sage_flow/python/sage_flow.py +30 -0
- sage/middleware/components/sage_flow/service.py +14 -0
- sage/middleware/components/sage_mem/__init__.py +83 -0
- sage/middleware/components/sage_sias/__init__.py +59 -0
- sage/middleware/components/sage_sias/continual_learner.py +184 -0
- sage/middleware/components/sage_sias/coreset_selector.py +302 -0
- sage/middleware/components/sage_sias/types.py +94 -0
- sage/middleware/components/sage_tsdb/__init__.py +81 -0
- sage/middleware/components/sage_tsdb/python/__init__.py +21 -0
- sage/middleware/components/sage_tsdb/python/_sage_tsdb.pyi +17 -0
- sage/middleware/components/sage_tsdb/python/algorithms/__init__.py +17 -0
- sage/middleware/components/sage_tsdb/python/algorithms/base.py +51 -0
- sage/middleware/components/sage_tsdb/python/algorithms/out_of_order_join.py +248 -0
- sage/middleware/components/sage_tsdb/python/algorithms/window_aggregator.py +296 -0
- sage/middleware/components/sage_tsdb/python/micro_service/__init__.py +7 -0
- sage/middleware/components/sage_tsdb/python/micro_service/sage_tsdb_service.py +365 -0
- sage/middleware/components/sage_tsdb/python/sage_tsdb.py +523 -0
- sage/middleware/components/sage_tsdb/service.py +17 -0
- sage/middleware/components/vector_stores/__init__.py +25 -0
- sage/middleware/components/vector_stores/chroma.py +483 -0
- sage/middleware/components/vector_stores/chroma_adapter.py +185 -0
- sage/middleware/components/vector_stores/milvus.py +677 -0
- sage/middleware/operators/__init__.py +56 -0
- sage/middleware/operators/agent/__init__.py +24 -0
- sage/middleware/operators/agent/planning/__init__.py +5 -0
- sage/middleware/operators/agent/planning/llm_adapter.py +41 -0
- sage/middleware/operators/agent/planning/planner_adapter.py +98 -0
- sage/middleware/operators/agent/planning/router.py +107 -0
- sage/middleware/operators/agent/runtime.py +296 -0
- sage/middleware/operators/agentic/__init__.py +41 -0
- sage/middleware/operators/agentic/config.py +254 -0
- sage/middleware/operators/agentic/planning_operator.py +125 -0
- sage/middleware/operators/agentic/refined_searcher.py +132 -0
- sage/middleware/operators/agentic/runtime.py +241 -0
- sage/middleware/operators/agentic/timing_operator.py +125 -0
- sage/middleware/operators/agentic/tool_selection_operator.py +127 -0
- sage/middleware/operators/context/__init__.py +17 -0
- sage/middleware/operators/context/critic_evaluation.py +16 -0
- sage/middleware/operators/context/model_context.py +565 -0
- sage/middleware/operators/context/quality_label.py +12 -0
- sage/middleware/operators/context/search_query_results.py +61 -0
- sage/middleware/operators/context/search_result.py +42 -0
- sage/middleware/operators/context/search_session.py +79 -0
- sage/middleware/operators/filters/__init__.py +26 -0
- sage/middleware/operators/filters/context_sink.py +387 -0
- sage/middleware/operators/filters/context_source.py +376 -0
- sage/middleware/operators/filters/evaluate_filter.py +83 -0
- sage/middleware/operators/filters/tool_filter.py +74 -0
- sage/middleware/operators/llm/__init__.py +18 -0
- sage/middleware/operators/llm/sagellm_generator.py +432 -0
- sage/middleware/operators/rag/__init__.py +147 -0
- sage/middleware/operators/rag/arxiv.py +331 -0
- sage/middleware/operators/rag/chunk.py +13 -0
- sage/middleware/operators/rag/document_loaders.py +23 -0
- sage/middleware/operators/rag/evaluate.py +658 -0
- sage/middleware/operators/rag/generator.py +340 -0
- sage/middleware/operators/rag/index_builder/__init__.py +48 -0
- sage/middleware/operators/rag/index_builder/builder.py +363 -0
- sage/middleware/operators/rag/index_builder/manifest.py +101 -0
- sage/middleware/operators/rag/index_builder/storage.py +131 -0
- sage/middleware/operators/rag/pipeline.py +46 -0
- sage/middleware/operators/rag/profiler.py +59 -0
- sage/middleware/operators/rag/promptor.py +400 -0
- sage/middleware/operators/rag/refiner.py +231 -0
- sage/middleware/operators/rag/reranker.py +364 -0
- sage/middleware/operators/rag/retriever.py +1308 -0
- sage/middleware/operators/rag/searcher.py +37 -0
- sage/middleware/operators/rag/types.py +28 -0
- sage/middleware/operators/rag/writer.py +80 -0
- sage/middleware/operators/tools/__init__.py +71 -0
- sage/middleware/operators/tools/arxiv_paper_searcher.py +175 -0
- sage/middleware/operators/tools/arxiv_searcher.py +102 -0
- sage/middleware/operators/tools/duckduckgo_searcher.py +105 -0
- sage/middleware/operators/tools/image_captioner.py +104 -0
- sage/middleware/operators/tools/nature_news_fetcher.py +224 -0
- sage/middleware/operators/tools/searcher_tool.py +514 -0
- sage/middleware/operators/tools/text_detector.py +185 -0
- sage/middleware/operators/tools/url_text_extractor.py +104 -0
- sage/middleware/py.typed +2 -0
|
@@ -0,0 +1,104 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
import requests
|
|
4
|
+
from bs4 import BeautifulSoup
|
|
5
|
+
|
|
6
|
+
from sage.libs.foundation.tools.tool import BaseTool
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class URL_Text_Extractor_Tool(BaseTool):
|
|
10
|
+
def __init__(self):
|
|
11
|
+
super().__init__(
|
|
12
|
+
tool_name="URL_Text_Extractor_Tool",
|
|
13
|
+
tool_description="A tool that extracts all text from a given URL.",
|
|
14
|
+
input_types={
|
|
15
|
+
"url": "str - The URL from which to extract text.",
|
|
16
|
+
},
|
|
17
|
+
output_type="dict - A dictionary containing the extracted text and any error messages.",
|
|
18
|
+
demo_commands=[
|
|
19
|
+
{
|
|
20
|
+
"command": 'execution = tool.execute(url="https://example.com")',
|
|
21
|
+
"description": "Extract all text from the example.com website.",
|
|
22
|
+
},
|
|
23
|
+
{
|
|
24
|
+
"command": 'execution = tool.execute(url="https://en.wikipedia.org/wiki/Python_(programming_language)")',
|
|
25
|
+
"description": "Extract all text from the Wikipedia page about Python programming language.",
|
|
26
|
+
},
|
|
27
|
+
],
|
|
28
|
+
)
|
|
29
|
+
self.tool_version = "1.0.0"
|
|
30
|
+
|
|
31
|
+
def extract_text_from_url(self, url):
|
|
32
|
+
"""
|
|
33
|
+
Extracts all text from the given URL.
|
|
34
|
+
|
|
35
|
+
Parameters:
|
|
36
|
+
url (str): The URL from which to extract text.
|
|
37
|
+
|
|
38
|
+
Returns:
|
|
39
|
+
str: The extracted text.
|
|
40
|
+
"""
|
|
41
|
+
url = url.replace("arxiv.org/pdf", "arxiv.org/abs")
|
|
42
|
+
|
|
43
|
+
try:
|
|
44
|
+
response = requests.get(url)
|
|
45
|
+
response.raise_for_status()
|
|
46
|
+
soup = BeautifulSoup(response.content, "html.parser")
|
|
47
|
+
text = soup.get_text(separator="\n", strip=True)
|
|
48
|
+
text = text[:10000] # Limit the text to 10000 characters
|
|
49
|
+
return text
|
|
50
|
+
except requests.RequestException as e:
|
|
51
|
+
return f"Error fetching URL: {str(e)}"
|
|
52
|
+
except Exception as e:
|
|
53
|
+
return f"Error extracting text: {str(e)}"
|
|
54
|
+
|
|
55
|
+
def execute(self, url):
|
|
56
|
+
extracted_text = self.extract_text_from_url(url)
|
|
57
|
+
return {"url": url, "extracted_text": extracted_text}
|
|
58
|
+
|
|
59
|
+
def get_metadata(self):
|
|
60
|
+
"""
|
|
61
|
+
Returns the metadata for the URL_Text_Extractor_Tool.
|
|
62
|
+
|
|
63
|
+
Returns:
|
|
64
|
+
dict: A dictionary containing the tool's metadata.
|
|
65
|
+
"""
|
|
66
|
+
metadata = super().get_metadata()
|
|
67
|
+
return metadata
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
if __name__ == "__main__":
|
|
71
|
+
# Test command:
|
|
72
|
+
"""
|
|
73
|
+
Run the following commands in the terminal to test the script:
|
|
74
|
+
|
|
75
|
+
cd octotools/tools/url_text_extractor
|
|
76
|
+
python tool.py
|
|
77
|
+
"""
|
|
78
|
+
|
|
79
|
+
# Get the directory of the current script
|
|
80
|
+
script_dir = os.path.dirname(os.path.abspath(__file__))
|
|
81
|
+
|
|
82
|
+
# Example usage of the URL_Text_Extractor_Tool
|
|
83
|
+
tool = URL_Text_Extractor_Tool()
|
|
84
|
+
|
|
85
|
+
# Get tool metadata
|
|
86
|
+
metadata = tool.get_metadata()
|
|
87
|
+
print(metadata)
|
|
88
|
+
|
|
89
|
+
# Sample URL for extracting text
|
|
90
|
+
url = "https://intellistream.github.io/SAGE-Pub/get_start/install/"
|
|
91
|
+
|
|
92
|
+
import json
|
|
93
|
+
|
|
94
|
+
# Execute the tool with the sample URL
|
|
95
|
+
try:
|
|
96
|
+
execution = tool.execute(url=url)
|
|
97
|
+
print("Execution Result:")
|
|
98
|
+
print(json.dumps(execution, indent=4))
|
|
99
|
+
for key, value in execution.items():
|
|
100
|
+
print(f"{key}:\n{value}\n")
|
|
101
|
+
except ValueError as e:
|
|
102
|
+
print(f"Execution failed: {e}")
|
|
103
|
+
|
|
104
|
+
print("Done!")
|
sage/middleware/py.typed
ADDED