noesium 0.1.0__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. noesium/agents/askura_agent/__init__.py +22 -0
  2. noesium/agents/askura_agent/askura_agent.py +480 -0
  3. noesium/agents/askura_agent/conversation.py +164 -0
  4. noesium/agents/askura_agent/extractor.py +175 -0
  5. noesium/agents/askura_agent/memory.py +14 -0
  6. noesium/agents/askura_agent/models.py +239 -0
  7. noesium/agents/askura_agent/prompts.py +202 -0
  8. noesium/agents/askura_agent/reflection.py +234 -0
  9. noesium/agents/askura_agent/summarizer.py +30 -0
  10. noesium/agents/askura_agent/utils.py +6 -0
  11. noesium/agents/deep_research/__init__.py +13 -0
  12. noesium/agents/deep_research/agent.py +398 -0
  13. noesium/agents/deep_research/prompts.py +84 -0
  14. noesium/agents/deep_research/schemas.py +42 -0
  15. noesium/agents/deep_research/state.py +54 -0
  16. noesium/agents/search/__init__.py +5 -0
  17. noesium/agents/search/agent.py +474 -0
  18. noesium/agents/search/state.py +28 -0
  19. noesium/core/__init__.py +1 -1
  20. noesium/core/agent/base.py +10 -2
  21. noesium/core/goalith/decomposer/llm_decomposer.py +1 -1
  22. noesium/core/llm/__init__.py +1 -1
  23. noesium/core/llm/base.py +2 -2
  24. noesium/core/llm/litellm.py +42 -21
  25. noesium/core/llm/llamacpp.py +25 -4
  26. noesium/core/llm/ollama.py +43 -22
  27. noesium/core/llm/openai.py +25 -5
  28. noesium/core/llm/openrouter.py +1 -1
  29. noesium/core/toolify/base.py +9 -2
  30. noesium/core/toolify/config.py +2 -2
  31. noesium/core/toolify/registry.py +21 -5
  32. noesium/core/tracing/opik_tracing.py +7 -7
  33. noesium/core/vector_store/__init__.py +2 -2
  34. noesium/core/vector_store/base.py +1 -1
  35. noesium/core/vector_store/pgvector.py +10 -13
  36. noesium/core/vector_store/weaviate.py +2 -1
  37. noesium/toolkits/__init__.py +1 -0
  38. noesium/toolkits/arxiv_toolkit.py +310 -0
  39. noesium/toolkits/audio_aliyun_toolkit.py +441 -0
  40. noesium/toolkits/audio_toolkit.py +370 -0
  41. noesium/toolkits/bash_toolkit.py +332 -0
  42. noesium/toolkits/document_toolkit.py +454 -0
  43. noesium/toolkits/file_edit_toolkit.py +552 -0
  44. noesium/toolkits/github_toolkit.py +395 -0
  45. noesium/toolkits/gmail_toolkit.py +575 -0
  46. noesium/toolkits/image_toolkit.py +425 -0
  47. noesium/toolkits/memory_toolkit.py +398 -0
  48. noesium/toolkits/python_executor_toolkit.py +334 -0
  49. noesium/toolkits/search_toolkit.py +451 -0
  50. noesium/toolkits/serper_toolkit.py +623 -0
  51. noesium/toolkits/tabular_data_toolkit.py +537 -0
  52. noesium/toolkits/user_interaction_toolkit.py +365 -0
  53. noesium/toolkits/video_toolkit.py +168 -0
  54. noesium/toolkits/wikipedia_toolkit.py +420 -0
  55. {noesium-0.1.0.dist-info → noesium-0.2.0.dist-info}/METADATA +56 -48
  56. {noesium-0.1.0.dist-info → noesium-0.2.0.dist-info}/RECORD +59 -23
  57. {noesium-0.1.0.dist-info → noesium-0.2.0.dist-info}/licenses/LICENSE +1 -1
  58. {noesium-0.1.0.dist-info → noesium-0.2.0.dist-info}/WHEEL +0 -0
  59. {noesium-0.1.0.dist-info → noesium-0.2.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,310 @@
1
+ """
2
+ ArXiv toolkit for academic paper search and download.
3
+
4
+ Provides tools for searching and downloading academic papers from arXiv.org
5
+ using the arXiv API with advanced query capabilities.
6
+ """
7
+
8
+ from typing import Callable, Dict, Generator, List, Optional
9
+
10
+ from noesium.core.toolify.base import AsyncBaseToolkit
11
+ from noesium.core.toolify.config import ToolkitConfig
12
+ from noesium.core.toolify.registry import register_toolkit
13
+ from noesium.core.utils.logging import get_logger
14
+
15
+ logger = get_logger(__name__)
16
+
17
+ try:
18
+ import arxiv
19
+
20
+ ARXIV_AVAILABLE = True
21
+ except ImportError:
22
+ arxiv = None
23
+ ARXIV_AVAILABLE = False
24
+
25
+
26
+ @register_toolkit("arxiv")
27
+ class ArxivToolkit(AsyncBaseToolkit):
28
+ """
29
+ Toolkit for searching and downloading academic papers from arXiv.
30
+
31
+ This toolkit provides access to the arXiv API for searching academic papers
32
+ by various criteria including title, author, abstract, and date ranges.
33
+ It also supports downloading PDFs of papers.
34
+
35
+ Features:
36
+ - Advanced search with filtering and operators
37
+ - Paper metadata extraction
38
+ - PDF download capabilities
39
+ - Configurable result limits
40
+ - Sort by relevance or other criteria
41
+
42
+ Required dependency: arxiv
43
+ Install with: pip install arxiv
44
+ """
45
+
46
+ def __init__(self, config: ToolkitConfig = None):
47
+ """
48
+ Initialize the ArXiv toolkit.
49
+
50
+ Args:
51
+ config: Toolkit configuration
52
+
53
+ Raises:
54
+ ImportError: If arxiv package is not installed
55
+ """
56
+ super().__init__(config)
57
+
58
+ if not ARXIV_AVAILABLE:
59
+ raise ImportError("arxiv package is required for ArxivToolkit. " "Install with: pip install arxiv")
60
+
61
+ # Initialize arXiv client
62
+ self.client = arxiv.Client()
63
+
64
+ # Configuration
65
+ self.default_max_results = self.config.config.get("default_max_results", 5)
66
+ self.default_sort_by = self.config.config.get("default_sort_by", "Relevance")
67
+ self.default_download_dir = self.config.config.get("default_download_dir", "./arxiv_papers")
68
+
69
+ def _get_search_results(
70
+ self,
71
+ query: str,
72
+ paper_ids: Optional[List[str]] = None,
73
+ max_results: Optional[int] = None,
74
+ sort_by: Optional[str] = None,
75
+ ) -> Generator:
76
+ """
77
+ Get search results from arXiv API.
78
+
79
+ Args:
80
+ query: Search query string
81
+ paper_ids: List of specific arXiv paper IDs
82
+ max_results: Maximum number of results
83
+ sort_by: Sort criterion (Relevance, LastUpdatedDate, SubmittedDate)
84
+
85
+ Returns:
86
+ Generator of arxiv.Result objects
87
+ """
88
+ paper_ids = paper_ids or []
89
+ max_results = max_results or self.default_max_results
90
+
91
+ # Map sort criteria
92
+ sort_mapping = {
93
+ "Relevance": arxiv.SortCriterion.Relevance,
94
+ "LastUpdatedDate": arxiv.SortCriterion.LastUpdatedDate,
95
+ "SubmittedDate": arxiv.SortCriterion.SubmittedDate,
96
+ }
97
+
98
+ sort_criterion = sort_mapping.get(sort_by or self.default_sort_by, arxiv.SortCriterion.Relevance)
99
+
100
+ search_query = arxiv.Search(
101
+ query=query,
102
+ id_list=paper_ids,
103
+ max_results=max_results,
104
+ sort_by=sort_criterion,
105
+ )
106
+
107
+ return self.client.results(search_query)
108
+
109
+ async def search_papers(
110
+ self,
111
+ query: str,
112
+ paper_ids: Optional[List[str]] = None,
113
+ max_results: Optional[int] = None,
114
+ sort_by: Optional[str] = None,
115
+ ) -> List[Dict[str, any]]:
116
+ """
117
+ Search for academic papers on arXiv using a query string and optional paper IDs.
118
+
119
+ This tool provides comprehensive search capabilities for arXiv papers with
120
+ advanced query syntax support and flexible filtering options.
121
+
122
+ Advanced Query Syntax:
123
+ - Field filtering: ti: (title), au: (author), abs: (abstract), all: (all fields)
124
+ - Boolean operators: AND, OR, ANDNOT
125
+ - Date ranges: submittedDate:[YYYYMMDDTTTT TO YYYYMMDDTTTT]
126
+ - Categories: cat:cs.AI (computer science - artificial intelligence)
127
+
128
+ Examples:
129
+ - "au:LeCun AND ti:neural" - Papers by LeCun with "neural" in title
130
+ - "abs:transformer AND cat:cs.CL" - Papers about transformers in computational linguistics
131
+ - "submittedDate:[20230101 TO 20240101]" - Papers from 2023
132
+
133
+ Args:
134
+ query: The search query string with optional advanced syntax
135
+ paper_ids: List of specific arXiv paper IDs to search for
136
+ max_results: Maximum number of search results to return (default: 5)
137
+ sort_by: Sort criterion - "Relevance", "LastUpdatedDate", or "SubmittedDate"
138
+
139
+ Returns:
140
+ List of dictionaries containing paper information:
141
+ - title: Paper title
142
+ - published_date: Publication date (ISO format)
143
+ - authors: List of author names
144
+ - entry_id: arXiv entry ID
145
+ - summary: Paper abstract/summary
146
+ - pdf_url: Direct PDF download URL
147
+ - categories: arXiv categories
148
+ - doi: DOI if available
149
+ """
150
+ self.logger.info(f"Searching arXiv for: {query}")
151
+
152
+ try:
153
+ search_results = self._get_search_results(query, paper_ids, max_results, sort_by)
154
+ papers_data = []
155
+
156
+ for paper in search_results:
157
+ # Extract author names
158
+ authors = [author.name for author in paper.authors]
159
+
160
+ # Extract categories
161
+ categories = [category for category in paper.categories]
162
+
163
+ paper_info = {
164
+ "title": paper.title.strip(),
165
+ "published_date": paper.updated.date().isoformat(),
166
+ "authors": authors,
167
+ "entry_id": paper.entry_id,
168
+ "summary": paper.summary.strip(),
169
+ "pdf_url": paper.pdf_url,
170
+ "categories": categories,
171
+ "doi": paper.doi,
172
+ "journal_ref": paper.journal_ref,
173
+ "comment": paper.comment,
174
+ }
175
+
176
+ papers_data.append(paper_info)
177
+
178
+ self.logger.info(f"Found {len(papers_data)} papers")
179
+ return papers_data
180
+
181
+ except Exception as e:
182
+ self.logger.error(f"arXiv search failed: {e}")
183
+ raise
184
+
185
+ async def download_papers(
186
+ self,
187
+ query: str,
188
+ paper_ids: Optional[List[str]] = None,
189
+ max_results: Optional[int] = None,
190
+ output_dir: Optional[str] = None,
191
+ ) -> str:
192
+ """
193
+ Download PDFs of academic papers from arXiv based on the provided query.
194
+
195
+ This tool searches for papers using the specified query and downloads
196
+ their PDF files to the specified directory. Files are saved with
197
+ sanitized titles as filenames.
198
+
199
+ Args:
200
+ query: The search query string (supports advanced syntax)
201
+ paper_ids: List of specific arXiv paper IDs to download
202
+ max_results: Maximum number of papers to download (default: 5)
203
+ output_dir: Directory to save downloaded PDFs (default: ./arxiv_papers)
204
+
205
+ Returns:
206
+ Status message indicating success or failure with details
207
+ """
208
+ output_dir = output_dir or self.default_download_dir
209
+ max_results = max_results or self.default_max_results
210
+
211
+ self.logger.info(f"Downloading papers for query: {query}")
212
+ self.logger.info(f"Output directory: {output_dir}")
213
+
214
+ try:
215
+ import os
216
+ import re
217
+
218
+ # Create output directory if it doesn't exist
219
+ os.makedirs(output_dir, exist_ok=True)
220
+
221
+ search_results = self._get_search_results(query, paper_ids, max_results)
222
+ downloaded_count = 0
223
+ failed_downloads = []
224
+
225
+ for paper in search_results:
226
+ try:
227
+ # Sanitize filename
228
+ safe_title = re.sub(r"[^\w\s-]", "", paper.title)
229
+ safe_title = re.sub(r"[-\s]+", "-", safe_title)
230
+ filename = f"{safe_title[:100]}.pdf" # Limit filename length
231
+
232
+ # Download the paper
233
+ paper.download_pdf(dirpath=output_dir, filename=filename)
234
+ downloaded_count += 1
235
+
236
+ self.logger.info(f"Downloaded: {filename}")
237
+
238
+ except Exception as e:
239
+ error_msg = f"Failed to download '{paper.title}': {str(e)}"
240
+ failed_downloads.append(error_msg)
241
+ self.logger.warning(error_msg)
242
+
243
+ # Prepare result message
244
+ result_msg = f"Successfully downloaded {downloaded_count} papers to {output_dir}"
245
+
246
+ if failed_downloads:
247
+ result_msg += f"\n\nFailed downloads ({len(failed_downloads)}):\n"
248
+ result_msg += "\n".join(failed_downloads)
249
+
250
+ return result_msg
251
+
252
+ except Exception as e:
253
+ error_msg = f"Download operation failed: {str(e)}"
254
+ self.logger.error(error_msg)
255
+ return error_msg
256
+
257
+ async def get_paper_details(self, paper_id: str) -> Dict[str, any]:
258
+ """
259
+ Get detailed information about a specific arXiv paper by ID.
260
+
261
+ Args:
262
+ paper_id: arXiv paper ID (e.g., "2301.07041" or "arxiv:2301.07041")
263
+
264
+ Returns:
265
+ Dictionary containing detailed paper information
266
+ """
267
+ self.logger.info(f"Getting details for paper: {paper_id}")
268
+
269
+ try:
270
+ # Clean paper ID (remove arxiv: prefix if present)
271
+ clean_id = paper_id.replace("arxiv:", "")
272
+
273
+ search_results = self._get_search_results("", paper_ids=[clean_id], max_results=1)
274
+
275
+ for paper in search_results:
276
+ return {
277
+ "title": paper.title.strip(),
278
+ "authors": [author.name for author in paper.authors],
279
+ "published_date": paper.published.isoformat() if paper.published else None,
280
+ "updated_date": paper.updated.isoformat() if paper.updated else None,
281
+ "entry_id": paper.entry_id,
282
+ "summary": paper.summary.strip(),
283
+ "pdf_url": paper.pdf_url,
284
+ "categories": list(paper.categories),
285
+ "primary_category": paper.primary_category,
286
+ "doi": paper.doi,
287
+ "journal_ref": paper.journal_ref,
288
+ "comment": paper.comment,
289
+ "links": [{"href": link.href, "title": link.title} for link in paper.links],
290
+ }
291
+
292
+ return {"error": f"Paper with ID '{paper_id}' not found"}
293
+
294
+ except Exception as e:
295
+ error_msg = f"Failed to get paper details: {str(e)}"
296
+ self.logger.error(error_msg)
297
+ return {"error": error_msg}
298
+
299
+ async def get_tools_map(self) -> Dict[str, Callable]:
300
+ """
301
+ Get the mapping of tool names to their implementation functions.
302
+
303
+ Returns:
304
+ Dictionary mapping tool names to callable functions
305
+ """
306
+ return {
307
+ "search_papers": self.search_papers,
308
+ "download_papers": self.download_papers,
309
+ "get_paper_details": self.get_paper_details,
310
+ }