noesium 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- noesium/agents/askura_agent/__init__.py +22 -0
- noesium/agents/askura_agent/askura_agent.py +480 -0
- noesium/agents/askura_agent/conversation.py +164 -0
- noesium/agents/askura_agent/extractor.py +175 -0
- noesium/agents/askura_agent/memory.py +14 -0
- noesium/agents/askura_agent/models.py +239 -0
- noesium/agents/askura_agent/prompts.py +202 -0
- noesium/agents/askura_agent/reflection.py +234 -0
- noesium/agents/askura_agent/summarizer.py +30 -0
- noesium/agents/askura_agent/utils.py +6 -0
- noesium/agents/deep_research/__init__.py +13 -0
- noesium/agents/deep_research/agent.py +398 -0
- noesium/agents/deep_research/prompts.py +84 -0
- noesium/agents/deep_research/schemas.py +42 -0
- noesium/agents/deep_research/state.py +54 -0
- noesium/agents/search/__init__.py +5 -0
- noesium/agents/search/agent.py +474 -0
- noesium/agents/search/state.py +28 -0
- noesium/core/__init__.py +1 -1
- noesium/core/agent/base.py +10 -2
- noesium/core/goalith/decomposer/llm_decomposer.py +1 -1
- noesium/core/llm/__init__.py +1 -1
- noesium/core/llm/base.py +2 -2
- noesium/core/llm/litellm.py +42 -21
- noesium/core/llm/llamacpp.py +25 -4
- noesium/core/llm/ollama.py +43 -22
- noesium/core/llm/openai.py +25 -5
- noesium/core/llm/openrouter.py +1 -1
- noesium/core/toolify/base.py +9 -2
- noesium/core/toolify/config.py +2 -2
- noesium/core/toolify/registry.py +21 -5
- noesium/core/tracing/opik_tracing.py +7 -7
- noesium/core/vector_store/__init__.py +2 -2
- noesium/core/vector_store/base.py +1 -1
- noesium/core/vector_store/pgvector.py +10 -13
- noesium/core/vector_store/weaviate.py +2 -1
- noesium/toolkits/__init__.py +1 -0
- noesium/toolkits/arxiv_toolkit.py +310 -0
- noesium/toolkits/audio_aliyun_toolkit.py +441 -0
- noesium/toolkits/audio_toolkit.py +370 -0
- noesium/toolkits/bash_toolkit.py +332 -0
- noesium/toolkits/document_toolkit.py +454 -0
- noesium/toolkits/file_edit_toolkit.py +552 -0
- noesium/toolkits/github_toolkit.py +395 -0
- noesium/toolkits/gmail_toolkit.py +575 -0
- noesium/toolkits/image_toolkit.py +425 -0
- noesium/toolkits/memory_toolkit.py +398 -0
- noesium/toolkits/python_executor_toolkit.py +334 -0
- noesium/toolkits/search_toolkit.py +451 -0
- noesium/toolkits/serper_toolkit.py +623 -0
- noesium/toolkits/tabular_data_toolkit.py +537 -0
- noesium/toolkits/user_interaction_toolkit.py +365 -0
- noesium/toolkits/video_toolkit.py +168 -0
- noesium/toolkits/wikipedia_toolkit.py +420 -0
- noesium-0.2.1.dist-info/METADATA +253 -0
- {noesium-0.1.0.dist-info → noesium-0.2.1.dist-info}/RECORD +59 -23
- {noesium-0.1.0.dist-info → noesium-0.2.1.dist-info}/licenses/LICENSE +1 -1
- noesium-0.1.0.dist-info/METADATA +0 -525
- {noesium-0.1.0.dist-info → noesium-0.2.1.dist-info}/WHEEL +0 -0
- {noesium-0.1.0.dist-info → noesium-0.2.1.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ArXiv toolkit for academic paper search and download.
|
|
3
|
+
|
|
4
|
+
Provides tools for searching and downloading academic papers from arXiv.org
|
|
5
|
+
using the arXiv API with advanced query capabilities.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from typing import Callable, Dict, Generator, List, Optional
|
|
9
|
+
|
|
10
|
+
from noesium.core.toolify.base import AsyncBaseToolkit
|
|
11
|
+
from noesium.core.toolify.config import ToolkitConfig
|
|
12
|
+
from noesium.core.toolify.registry import register_toolkit
|
|
13
|
+
from noesium.core.utils.logging import get_logger
|
|
14
|
+
|
|
15
|
+
logger = get_logger(__name__)
|
|
16
|
+
|
|
17
|
+
try:
|
|
18
|
+
import arxiv
|
|
19
|
+
|
|
20
|
+
ARXIV_AVAILABLE = True
|
|
21
|
+
except ImportError:
|
|
22
|
+
arxiv = None
|
|
23
|
+
ARXIV_AVAILABLE = False
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@register_toolkit("arxiv")
|
|
27
|
+
class ArxivToolkit(AsyncBaseToolkit):
|
|
28
|
+
"""
|
|
29
|
+
Toolkit for searching and downloading academic papers from arXiv.
|
|
30
|
+
|
|
31
|
+
This toolkit provides access to the arXiv API for searching academic papers
|
|
32
|
+
by various criteria including title, author, abstract, and date ranges.
|
|
33
|
+
It also supports downloading PDFs of papers.
|
|
34
|
+
|
|
35
|
+
Features:
|
|
36
|
+
- Advanced search with filtering and operators
|
|
37
|
+
- Paper metadata extraction
|
|
38
|
+
- PDF download capabilities
|
|
39
|
+
- Configurable result limits
|
|
40
|
+
- Sort by relevance or other criteria
|
|
41
|
+
|
|
42
|
+
Required dependency: arxiv
|
|
43
|
+
Install with: pip install arxiv
|
|
44
|
+
"""
|
|
45
|
+
|
|
46
|
+
def __init__(self, config: ToolkitConfig = None):
|
|
47
|
+
"""
|
|
48
|
+
Initialize the ArXiv toolkit.
|
|
49
|
+
|
|
50
|
+
Args:
|
|
51
|
+
config: Toolkit configuration
|
|
52
|
+
|
|
53
|
+
Raises:
|
|
54
|
+
ImportError: If arxiv package is not installed
|
|
55
|
+
"""
|
|
56
|
+
super().__init__(config)
|
|
57
|
+
|
|
58
|
+
if not ARXIV_AVAILABLE:
|
|
59
|
+
raise ImportError("arxiv package is required for ArxivToolkit. " "Install with: pip install arxiv")
|
|
60
|
+
|
|
61
|
+
# Initialize arXiv client
|
|
62
|
+
self.client = arxiv.Client()
|
|
63
|
+
|
|
64
|
+
# Configuration
|
|
65
|
+
self.default_max_results = self.config.config.get("default_max_results", 5)
|
|
66
|
+
self.default_sort_by = self.config.config.get("default_sort_by", "Relevance")
|
|
67
|
+
self.default_download_dir = self.config.config.get("default_download_dir", "./arxiv_papers")
|
|
68
|
+
|
|
69
|
+
def _get_search_results(
|
|
70
|
+
self,
|
|
71
|
+
query: str,
|
|
72
|
+
paper_ids: Optional[List[str]] = None,
|
|
73
|
+
max_results: Optional[int] = None,
|
|
74
|
+
sort_by: Optional[str] = None,
|
|
75
|
+
) -> Generator:
|
|
76
|
+
"""
|
|
77
|
+
Get search results from arXiv API.
|
|
78
|
+
|
|
79
|
+
Args:
|
|
80
|
+
query: Search query string
|
|
81
|
+
paper_ids: List of specific arXiv paper IDs
|
|
82
|
+
max_results: Maximum number of results
|
|
83
|
+
sort_by: Sort criterion (Relevance, LastUpdatedDate, SubmittedDate)
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
Generator of arxiv.Result objects
|
|
87
|
+
"""
|
|
88
|
+
paper_ids = paper_ids or []
|
|
89
|
+
max_results = max_results or self.default_max_results
|
|
90
|
+
|
|
91
|
+
# Map sort criteria
|
|
92
|
+
sort_mapping = {
|
|
93
|
+
"Relevance": arxiv.SortCriterion.Relevance,
|
|
94
|
+
"LastUpdatedDate": arxiv.SortCriterion.LastUpdatedDate,
|
|
95
|
+
"SubmittedDate": arxiv.SortCriterion.SubmittedDate,
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
sort_criterion = sort_mapping.get(sort_by or self.default_sort_by, arxiv.SortCriterion.Relevance)
|
|
99
|
+
|
|
100
|
+
search_query = arxiv.Search(
|
|
101
|
+
query=query,
|
|
102
|
+
id_list=paper_ids,
|
|
103
|
+
max_results=max_results,
|
|
104
|
+
sort_by=sort_criterion,
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
return self.client.results(search_query)
|
|
108
|
+
|
|
109
|
+
async def search_papers(
|
|
110
|
+
self,
|
|
111
|
+
query: str,
|
|
112
|
+
paper_ids: Optional[List[str]] = None,
|
|
113
|
+
max_results: Optional[int] = None,
|
|
114
|
+
sort_by: Optional[str] = None,
|
|
115
|
+
) -> List[Dict[str, any]]:
|
|
116
|
+
"""
|
|
117
|
+
Search for academic papers on arXiv using a query string and optional paper IDs.
|
|
118
|
+
|
|
119
|
+
This tool provides comprehensive search capabilities for arXiv papers with
|
|
120
|
+
advanced query syntax support and flexible filtering options.
|
|
121
|
+
|
|
122
|
+
Advanced Query Syntax:
|
|
123
|
+
- Field filtering: ti: (title), au: (author), abs: (abstract), all: (all fields)
|
|
124
|
+
- Boolean operators: AND, OR, ANDNOT
|
|
125
|
+
- Date ranges: submittedDate:[YYYYMMDDTTTT TO YYYYMMDDTTTT]
|
|
126
|
+
- Categories: cat:cs.AI (computer science - artificial intelligence)
|
|
127
|
+
|
|
128
|
+
Examples:
|
|
129
|
+
- "au:LeCun AND ti:neural" - Papers by LeCun with "neural" in title
|
|
130
|
+
- "abs:transformer AND cat:cs.CL" - Papers about transformers in computational linguistics
|
|
131
|
+
- "submittedDate:[20230101 TO 20240101]" - Papers from 2023
|
|
132
|
+
|
|
133
|
+
Args:
|
|
134
|
+
query: The search query string with optional advanced syntax
|
|
135
|
+
paper_ids: List of specific arXiv paper IDs to search for
|
|
136
|
+
max_results: Maximum number of search results to return (default: 5)
|
|
137
|
+
sort_by: Sort criterion - "Relevance", "LastUpdatedDate", or "SubmittedDate"
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
List of dictionaries containing paper information:
|
|
141
|
+
- title: Paper title
|
|
142
|
+
- published_date: Publication date (ISO format)
|
|
143
|
+
- authors: List of author names
|
|
144
|
+
- entry_id: arXiv entry ID
|
|
145
|
+
- summary: Paper abstract/summary
|
|
146
|
+
- pdf_url: Direct PDF download URL
|
|
147
|
+
- categories: arXiv categories
|
|
148
|
+
- doi: DOI if available
|
|
149
|
+
"""
|
|
150
|
+
self.logger.info(f"Searching arXiv for: {query}")
|
|
151
|
+
|
|
152
|
+
try:
|
|
153
|
+
search_results = self._get_search_results(query, paper_ids, max_results, sort_by)
|
|
154
|
+
papers_data = []
|
|
155
|
+
|
|
156
|
+
for paper in search_results:
|
|
157
|
+
# Extract author names
|
|
158
|
+
authors = [author.name for author in paper.authors]
|
|
159
|
+
|
|
160
|
+
# Extract categories
|
|
161
|
+
categories = [category for category in paper.categories]
|
|
162
|
+
|
|
163
|
+
paper_info = {
|
|
164
|
+
"title": paper.title.strip(),
|
|
165
|
+
"published_date": paper.updated.date().isoformat(),
|
|
166
|
+
"authors": authors,
|
|
167
|
+
"entry_id": paper.entry_id,
|
|
168
|
+
"summary": paper.summary.strip(),
|
|
169
|
+
"pdf_url": paper.pdf_url,
|
|
170
|
+
"categories": categories,
|
|
171
|
+
"doi": paper.doi,
|
|
172
|
+
"journal_ref": paper.journal_ref,
|
|
173
|
+
"comment": paper.comment,
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
papers_data.append(paper_info)
|
|
177
|
+
|
|
178
|
+
self.logger.info(f"Found {len(papers_data)} papers")
|
|
179
|
+
return papers_data
|
|
180
|
+
|
|
181
|
+
except Exception as e:
|
|
182
|
+
self.logger.error(f"arXiv search failed: {e}")
|
|
183
|
+
raise
|
|
184
|
+
|
|
185
|
+
async def download_papers(
|
|
186
|
+
self,
|
|
187
|
+
query: str,
|
|
188
|
+
paper_ids: Optional[List[str]] = None,
|
|
189
|
+
max_results: Optional[int] = None,
|
|
190
|
+
output_dir: Optional[str] = None,
|
|
191
|
+
) -> str:
|
|
192
|
+
"""
|
|
193
|
+
Download PDFs of academic papers from arXiv based on the provided query.
|
|
194
|
+
|
|
195
|
+
This tool searches for papers using the specified query and downloads
|
|
196
|
+
their PDF files to the specified directory. Files are saved with
|
|
197
|
+
sanitized titles as filenames.
|
|
198
|
+
|
|
199
|
+
Args:
|
|
200
|
+
query: The search query string (supports advanced syntax)
|
|
201
|
+
paper_ids: List of specific arXiv paper IDs to download
|
|
202
|
+
max_results: Maximum number of papers to download (default: 5)
|
|
203
|
+
output_dir: Directory to save downloaded PDFs (default: ./arxiv_papers)
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
Status message indicating success or failure with details
|
|
207
|
+
"""
|
|
208
|
+
output_dir = output_dir or self.default_download_dir
|
|
209
|
+
max_results = max_results or self.default_max_results
|
|
210
|
+
|
|
211
|
+
self.logger.info(f"Downloading papers for query: {query}")
|
|
212
|
+
self.logger.info(f"Output directory: {output_dir}")
|
|
213
|
+
|
|
214
|
+
try:
|
|
215
|
+
import os
|
|
216
|
+
import re
|
|
217
|
+
|
|
218
|
+
# Create output directory if it doesn't exist
|
|
219
|
+
os.makedirs(output_dir, exist_ok=True)
|
|
220
|
+
|
|
221
|
+
search_results = self._get_search_results(query, paper_ids, max_results)
|
|
222
|
+
downloaded_count = 0
|
|
223
|
+
failed_downloads = []
|
|
224
|
+
|
|
225
|
+
for paper in search_results:
|
|
226
|
+
try:
|
|
227
|
+
# Sanitize filename
|
|
228
|
+
safe_title = re.sub(r"[^\w\s-]", "", paper.title)
|
|
229
|
+
safe_title = re.sub(r"[-\s]+", "-", safe_title)
|
|
230
|
+
filename = f"{safe_title[:100]}.pdf" # Limit filename length
|
|
231
|
+
|
|
232
|
+
# Download the paper
|
|
233
|
+
paper.download_pdf(dirpath=output_dir, filename=filename)
|
|
234
|
+
downloaded_count += 1
|
|
235
|
+
|
|
236
|
+
self.logger.info(f"Downloaded: {filename}")
|
|
237
|
+
|
|
238
|
+
except Exception as e:
|
|
239
|
+
error_msg = f"Failed to download '{paper.title}': {str(e)}"
|
|
240
|
+
failed_downloads.append(error_msg)
|
|
241
|
+
self.logger.warning(error_msg)
|
|
242
|
+
|
|
243
|
+
# Prepare result message
|
|
244
|
+
result_msg = f"Successfully downloaded {downloaded_count} papers to {output_dir}"
|
|
245
|
+
|
|
246
|
+
if failed_downloads:
|
|
247
|
+
result_msg += f"\n\nFailed downloads ({len(failed_downloads)}):\n"
|
|
248
|
+
result_msg += "\n".join(failed_downloads)
|
|
249
|
+
|
|
250
|
+
return result_msg
|
|
251
|
+
|
|
252
|
+
except Exception as e:
|
|
253
|
+
error_msg = f"Download operation failed: {str(e)}"
|
|
254
|
+
self.logger.error(error_msg)
|
|
255
|
+
return error_msg
|
|
256
|
+
|
|
257
|
+
async def get_paper_details(self, paper_id: str) -> Dict[str, any]:
|
|
258
|
+
"""
|
|
259
|
+
Get detailed information about a specific arXiv paper by ID.
|
|
260
|
+
|
|
261
|
+
Args:
|
|
262
|
+
paper_id: arXiv paper ID (e.g., "2301.07041" or "arxiv:2301.07041")
|
|
263
|
+
|
|
264
|
+
Returns:
|
|
265
|
+
Dictionary containing detailed paper information
|
|
266
|
+
"""
|
|
267
|
+
self.logger.info(f"Getting details for paper: {paper_id}")
|
|
268
|
+
|
|
269
|
+
try:
|
|
270
|
+
# Clean paper ID (remove arxiv: prefix if present)
|
|
271
|
+
clean_id = paper_id.replace("arxiv:", "")
|
|
272
|
+
|
|
273
|
+
search_results = self._get_search_results("", paper_ids=[clean_id], max_results=1)
|
|
274
|
+
|
|
275
|
+
for paper in search_results:
|
|
276
|
+
return {
|
|
277
|
+
"title": paper.title.strip(),
|
|
278
|
+
"authors": [author.name for author in paper.authors],
|
|
279
|
+
"published_date": paper.published.isoformat() if paper.published else None,
|
|
280
|
+
"updated_date": paper.updated.isoformat() if paper.updated else None,
|
|
281
|
+
"entry_id": paper.entry_id,
|
|
282
|
+
"summary": paper.summary.strip(),
|
|
283
|
+
"pdf_url": paper.pdf_url,
|
|
284
|
+
"categories": list(paper.categories),
|
|
285
|
+
"primary_category": paper.primary_category,
|
|
286
|
+
"doi": paper.doi,
|
|
287
|
+
"journal_ref": paper.journal_ref,
|
|
288
|
+
"comment": paper.comment,
|
|
289
|
+
"links": [{"href": link.href, "title": link.title} for link in paper.links],
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
return {"error": f"Paper with ID '{paper_id}' not found"}
|
|
293
|
+
|
|
294
|
+
except Exception as e:
|
|
295
|
+
error_msg = f"Failed to get paper details: {str(e)}"
|
|
296
|
+
self.logger.error(error_msg)
|
|
297
|
+
return {"error": error_msg}
|
|
298
|
+
|
|
299
|
+
async def get_tools_map(self) -> Dict[str, Callable]:
|
|
300
|
+
"""
|
|
301
|
+
Get the mapping of tool names to their implementation functions.
|
|
302
|
+
|
|
303
|
+
Returns:
|
|
304
|
+
Dictionary mapping tool names to callable functions
|
|
305
|
+
"""
|
|
306
|
+
return {
|
|
307
|
+
"search_papers": self.search_papers,
|
|
308
|
+
"download_papers": self.download_papers,
|
|
309
|
+
"get_paper_details": self.get_paper_details,
|
|
310
|
+
}
|