noesium 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. noesium/agents/askura_agent/__init__.py +22 -0
  2. noesium/agents/askura_agent/askura_agent.py +480 -0
  3. noesium/agents/askura_agent/conversation.py +164 -0
  4. noesium/agents/askura_agent/extractor.py +175 -0
  5. noesium/agents/askura_agent/memory.py +14 -0
  6. noesium/agents/askura_agent/models.py +239 -0
  7. noesium/agents/askura_agent/prompts.py +202 -0
  8. noesium/agents/askura_agent/reflection.py +234 -0
  9. noesium/agents/askura_agent/summarizer.py +30 -0
  10. noesium/agents/askura_agent/utils.py +6 -0
  11. noesium/agents/deep_research/__init__.py +13 -0
  12. noesium/agents/deep_research/agent.py +398 -0
  13. noesium/agents/deep_research/prompts.py +84 -0
  14. noesium/agents/deep_research/schemas.py +42 -0
  15. noesium/agents/deep_research/state.py +54 -0
  16. noesium/agents/search/__init__.py +5 -0
  17. noesium/agents/search/agent.py +474 -0
  18. noesium/agents/search/state.py +28 -0
  19. noesium/core/__init__.py +1 -1
  20. noesium/core/agent/base.py +10 -2
  21. noesium/core/goalith/decomposer/llm_decomposer.py +1 -1
  22. noesium/core/llm/__init__.py +1 -1
  23. noesium/core/llm/base.py +2 -2
  24. noesium/core/llm/litellm.py +42 -21
  25. noesium/core/llm/llamacpp.py +25 -4
  26. noesium/core/llm/ollama.py +43 -22
  27. noesium/core/llm/openai.py +25 -5
  28. noesium/core/llm/openrouter.py +1 -1
  29. noesium/core/toolify/base.py +9 -2
  30. noesium/core/toolify/config.py +2 -2
  31. noesium/core/toolify/registry.py +21 -5
  32. noesium/core/tracing/opik_tracing.py +7 -7
  33. noesium/core/vector_store/__init__.py +2 -2
  34. noesium/core/vector_store/base.py +1 -1
  35. noesium/core/vector_store/pgvector.py +10 -13
  36. noesium/core/vector_store/weaviate.py +2 -1
  37. noesium/toolkits/__init__.py +1 -0
  38. noesium/toolkits/arxiv_toolkit.py +310 -0
  39. noesium/toolkits/audio_aliyun_toolkit.py +441 -0
  40. noesium/toolkits/audio_toolkit.py +370 -0
  41. noesium/toolkits/bash_toolkit.py +332 -0
  42. noesium/toolkits/document_toolkit.py +454 -0
  43. noesium/toolkits/file_edit_toolkit.py +552 -0
  44. noesium/toolkits/github_toolkit.py +395 -0
  45. noesium/toolkits/gmail_toolkit.py +575 -0
  46. noesium/toolkits/image_toolkit.py +425 -0
  47. noesium/toolkits/memory_toolkit.py +398 -0
  48. noesium/toolkits/python_executor_toolkit.py +334 -0
  49. noesium/toolkits/search_toolkit.py +451 -0
  50. noesium/toolkits/serper_toolkit.py +623 -0
  51. noesium/toolkits/tabular_data_toolkit.py +537 -0
  52. noesium/toolkits/user_interaction_toolkit.py +365 -0
  53. noesium/toolkits/video_toolkit.py +168 -0
  54. noesium/toolkits/wikipedia_toolkit.py +420 -0
  55. noesium-0.2.1.dist-info/METADATA +253 -0
  56. {noesium-0.1.0.dist-info → noesium-0.2.1.dist-info}/RECORD +59 -23
  57. {noesium-0.1.0.dist-info → noesium-0.2.1.dist-info}/licenses/LICENSE +1 -1
  58. noesium-0.1.0.dist-info/METADATA +0 -525
  59. {noesium-0.1.0.dist-info → noesium-0.2.1.dist-info}/WHEEL +0 -0
  60. {noesium-0.1.0.dist-info → noesium-0.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,441 @@
1
+ """
2
+ Audio processing toolkit using Aliyun NLS (Natural Language Service) for transcription.
3
+
4
+ Provides tools for audio transcription using Aliyun's Lingjie AI service and
5
+ audio content analysis using LLMs. This toolkit migrates the functionality
6
+ from the smartvoice module to the toolify framework.
7
+ """
8
+
9
+ import asyncio
10
+ import json
11
+ import os
12
+ from typing import Any, Callable, Dict, Optional
13
+
14
+ try:
15
+ from aliyunsdkcore.acs_exception.exceptions import ClientException, ServerException
16
+ from aliyunsdkcore.client import AcsClient
17
+ from aliyunsdkcore.request import CommonRequest
18
+
19
+ ALIYUN_AVAILABLE = True
20
+ except ImportError:
21
+ ClientException = None
22
+ ServerException = None
23
+ AcsClient = None
24
+ CommonRequest = None
25
+ ALIYUN_AVAILABLE = False
26
+
27
+ from noesium.core.toolify.base import AsyncBaseToolkit
28
+ from noesium.core.toolify.config import ToolkitConfig
29
+ from noesium.core.toolify.registry import register_toolkit
30
+ from noesium.core.utils.logging import get_logger
31
+
32
+ logger = get_logger(__name__)
33
+
34
+
35
+ @register_toolkit("audio_aliyun")
36
+ class AudioAliyunToolkit(AsyncBaseToolkit):
37
+ """
38
+ Toolkit for audio processing and analysis using Aliyun NLS service.
39
+
40
+ This toolkit provides capabilities for:
41
+ - Audio transcription using Aliyun's Lingjie AI service
42
+ - Audio content analysis and Q&A using LLMs
43
+ - Async/await support for better performance
44
+
45
+ Features:
46
+ - Direct transcription from publicly accessible audio URLs
47
+ - LLM-powered audio content analysis
48
+ - Optimized for Chinese language content
49
+
50
+ Required configuration:
51
+ - Aliyun Access Key ID and Secret
52
+ - NLS App Key
53
+ - LLM configuration for analysis
54
+
55
+ Note: Audio files must be publicly accessible URLs for Aliyun NLS service.
56
+ """
57
+
58
+ def __init__(self, config: ToolkitConfig = None):
59
+ """
60
+ Initialize the Aliyun audio toolkit.
61
+
62
+ Args:
63
+ config: Toolkit configuration containing API keys and settings
64
+ """
65
+ if not ALIYUN_AVAILABLE:
66
+ raise ImportError("Aliyun packages are not installed. Install them with: pip install 'noesium[aliyun]'")
67
+
68
+ super().__init__(config)
69
+
70
+ # Aliyun credentials
71
+ self.ak_id = self.config.config.get("ALIYUN_ACCESS_KEY_ID") or os.getenv("ALIYUN_ACCESS_KEY_ID")
72
+ self.ak_secret = self.config.config.get("ALIYUN_ACCESS_KEY_SECRET") or os.getenv("ALIYUN_ACCESS_KEY_SECRET")
73
+ self.app_key = self.config.config.get("ALIYUN_NLS_APP_KEY") or os.getenv("ALIYUN_NLS_APP_KEY")
74
+ self.region_id = self.config.config.get("ALIYUN_REGION_ID", "cn-shanghai")
75
+
76
+ if not all([self.ak_id, self.ak_secret, self.app_key]):
77
+ raise ValueError(
78
+ "Aliyun credentials not found. Please set ALIYUN_ACCESS_KEY_ID, "
79
+ "ALIYUN_ACCESS_KEY_SECRET, and ALIYUN_NLS_APP_KEY in config or environment"
80
+ )
81
+
82
+ # Configuration - minimal setup, no caching like smart_voice.py
83
+
84
+ # Aliyun NLS service constants
85
+ self.PRODUCT = "nls-filetrans"
86
+ self.DOMAIN = f"filetrans.{self.region_id}.aliyuncs.com"
87
+ self.API_VERSION = "2018-08-17"
88
+ self.POST_REQUEST_ACTION = "SubmitTask"
89
+ self.GET_REQUEST_ACTION = "GetTaskResult"
90
+
91
+ # Request parameters
92
+ self.KEY_APP_KEY = "appkey"
93
+ self.KEY_FILE_LINK = "file_link"
94
+ self.KEY_VERSION = "version"
95
+ self.KEY_ENABLE_WORDS = "enable_words"
96
+ self.KEY_AUTO_SPLIT = "auto_split"
97
+
98
+ # Response parameters
99
+ self.KEY_TASK = "Task"
100
+ self.KEY_TASK_ID = "TaskId"
101
+ self.KEY_STATUS_TEXT = "StatusText"
102
+ self.KEY_RESULT = "Result"
103
+
104
+ # Status values
105
+ self.STATUS_SUCCESS = "SUCCESS"
106
+ self.STATUS_RUNNING = "RUNNING"
107
+ self.STATUS_QUEUEING = "QUEUEING"
108
+
109
+ # Create AcsClient instance
110
+ self.client = AcsClient(self.ak_id, self.ak_secret, self.region_id)
111
+
112
+ async def _transcribe_file_aliyun(self, file_link: str) -> Optional[Dict[str, Any]]:
113
+ """
114
+ Perform file transcription using Aliyun NLS service.
115
+ This follows the exact same logic as smart_voice.py but with async support.
116
+
117
+ Args:
118
+ file_link: URL of the audio file to transcribe
119
+
120
+ Returns:
121
+ Transcription result dictionary or None if failed
122
+ """
123
+ # Submit transcription request
124
+ post_request = CommonRequest()
125
+ post_request.set_domain(self.DOMAIN)
126
+ post_request.set_version(self.API_VERSION)
127
+ post_request.set_product(self.PRODUCT)
128
+ post_request.set_action_name(self.POST_REQUEST_ACTION)
129
+ post_request.set_method("POST")
130
+
131
+ # Configure task parameters
132
+ # Use version 4.0 for new integrations, set enable_words to False by default
133
+ task = {
134
+ self.KEY_APP_KEY: self.app_key,
135
+ self.KEY_FILE_LINK: file_link,
136
+ self.KEY_VERSION: "4.0",
137
+ self.KEY_ENABLE_WORDS: False,
138
+ }
139
+
140
+ # Uncomment to enable auto split for multi-speaker scenarios
141
+ # task[self.KEY_AUTO_SPLIT] = True
142
+
143
+ task_json = json.dumps(task)
144
+ self.logger.info(f"Submitting task: {task_json}")
145
+ post_request.add_body_params(self.KEY_TASK, task_json)
146
+
147
+ task_id = ""
148
+ try:
149
+ # Run in executor to avoid blocking the event loop
150
+ loop = asyncio.get_event_loop()
151
+ post_response = await loop.run_in_executor(None, self.client.do_action_with_exception, post_request)
152
+ post_response_json = json.loads(post_response)
153
+ self.logger.info(f"Submit response: {post_response_json}")
154
+
155
+ status_text = post_response_json[self.KEY_STATUS_TEXT]
156
+ if status_text == self.STATUS_SUCCESS:
157
+ self.logger.info("File transcription request submitted successfully!")
158
+ task_id = post_response_json[self.KEY_TASK_ID]
159
+ else:
160
+ self.logger.error(f"File transcription request failed: {status_text}")
161
+ return None
162
+ except ServerException as e:
163
+ self.logger.error(f"Server error: {e}")
164
+ return None
165
+ except ClientException as e:
166
+ self.logger.error(f"Client error: {e}")
167
+ return None
168
+
169
+ if not task_id:
170
+ self.logger.error("No task ID received")
171
+ return None
172
+
173
+ # Create request to get task result
174
+ get_request = CommonRequest()
175
+ get_request.set_domain(self.DOMAIN)
176
+ get_request.set_version(self.API_VERSION)
177
+ get_request.set_product(self.PRODUCT)
178
+ get_request.set_action_name(self.GET_REQUEST_ACTION)
179
+ get_request.set_method("GET")
180
+ get_request.add_query_param(self.KEY_TASK_ID, task_id)
181
+
182
+ # Poll for results
183
+ self.logger.info(f"Polling for results with task ID: {task_id}")
184
+ status_text = ""
185
+ max_attempts = 60 # Maximum 10 minutes (60 * 10 seconds)
186
+ attempt = 0
187
+
188
+ while attempt < max_attempts:
189
+ try:
190
+ # Run in executor to avoid blocking the event loop
191
+ get_response = await loop.run_in_executor(None, self.client.do_action_with_exception, get_request)
192
+ get_response_json = json.loads(get_response)
193
+ self.logger.info(f"Poll response (attempt {attempt + 1}): {get_response_json}")
194
+
195
+ status_text = get_response_json[self.KEY_STATUS_TEXT]
196
+ if status_text == self.STATUS_RUNNING or status_text == self.STATUS_QUEUEING:
197
+ # Continue polling
198
+ await asyncio.sleep(10)
199
+ attempt += 1
200
+ else:
201
+ # Exit polling
202
+ break
203
+ except ServerException as e:
204
+ self.logger.error(f"Server error during polling: {e}")
205
+ return None
206
+ except ClientException as e:
207
+ self.logger.error(f"Client error during polling: {e}")
208
+ return None
209
+
210
+ if status_text == self.STATUS_SUCCESS:
211
+ self.logger.info("File transcription completed successfully!")
212
+ return get_response_json.get(self.KEY_RESULT)
213
+ else:
214
+ self.logger.error(f"File transcription failed with status: {status_text}")
215
+ return None
216
+
217
+ def _extract_transcription_text(self, result: Dict[str, Any]) -> Optional[str]:
218
+ """
219
+ Extract transcription text from the lingji_ai result.
220
+ This is exactly the same logic as smart_voice.py.
221
+
222
+ Args:
223
+ result: The result from transcribe_file function
224
+
225
+ Returns:
226
+ Extracted transcription text or None if extraction fails
227
+ """
228
+ try:
229
+ # The result structure from lingji_ai contains sentences with text
230
+ if isinstance(result, dict) and "Sentences" in result:
231
+ sentences = result["Sentences"]
232
+ if isinstance(sentences, list):
233
+ # Extract text from each sentence, avoiding duplicates
234
+ # Since there are duplicate entries with different ChannelId,
235
+ # we'll use a set to store unique texts
236
+ unique_texts = set()
237
+ for sentence in sentences:
238
+ if isinstance(sentence, dict) and "Text" in sentence:
239
+ text = sentence["Text"].strip()
240
+ if text: # Only add non-empty text
241
+ unique_texts.add(text)
242
+
243
+ # Convert set back to list and join
244
+ if unique_texts:
245
+ transcription_parts = sorted(list(unique_texts))
246
+ return " ".join(transcription_parts)
247
+
248
+ # If the structure is different, try to find text in the result
249
+ if isinstance(result, dict):
250
+ # Look for common transcription result keys
251
+ for key in ["text", "transcription", "content", "result"]:
252
+ if key in result:
253
+ return str(result[key])
254
+
255
+ # If no direct text found, try to extract from nested structure
256
+ return json.dumps(result, ensure_ascii=False)
257
+
258
+ # If result is already a string, return it
259
+ if isinstance(result, str):
260
+ return result
261
+
262
+ except Exception as e:
263
+ self.logger.error("Error extracting transcription text: %s", str(e))
264
+ return None
265
+
266
+ return None
267
+
268
+ async def _transcribe_audio_aliyun(self, md5_hash: str) -> Dict:
269
+ """
270
+ Transcribe audio file using Aliyun NLS service.
271
+
272
+ Args:
273
+ md5_hash: MD5 hash of the audio file
274
+
275
+ Returns:
276
+ Transcription result with text and metadata
277
+ """
278
+ # Check cache first
279
+ cache_file = self.cache_dir / f"{md5_hash}.json"
280
+ if cache_file.exists():
281
+ with open(cache_file, "r") as f:
282
+ return json.load(f)
283
+
284
+ # Get file path
285
+ if md5_hash not in self.md5_to_path:
286
+ raise ValueError(f"Audio file with MD5 {md5_hash} not found in cache")
287
+
288
+ file_path = self.md5_to_path[md5_hash]
289
+
290
+ try:
291
+ self.logger.info(f"Transcribing audio file with Aliyun NLS: {file_path}")
292
+
293
+ # For Aliyun NLS, we need to provide a URL to the file
294
+ # If it's a local file, we need to upload it or provide a URL
295
+ # For now, we'll assume the file_path is accessible as a URL
296
+ # In production, you might need to upload the file to OSS first
297
+
298
+ # Perform transcription
299
+ aliyun_result = await self._transcribe_file_aliyun(file_path)
300
+
301
+ if aliyun_result is None:
302
+ raise Exception("Aliyun NLS transcription failed")
303
+
304
+ # Extract text from Aliyun result
305
+ transcription_text = self._extract_transcription_text(aliyun_result)
306
+
307
+ if transcription_text is None:
308
+ raise Exception("Failed to extract text from Aliyun NLS result")
309
+
310
+ # Create standardized result format
311
+ result = {
312
+ "text": transcription_text,
313
+ "language": "zh", # Aliyun NLS primarily supports Chinese
314
+ "aliyun_result": aliyun_result, # Keep original result for reference
315
+ "provider": "aliyun_nls",
316
+ "duration": None, # Aliyun NLS doesn't provide duration in the same format
317
+ }
318
+
319
+ # Cache the result
320
+ with open(cache_file, "w", encoding="utf-8") as f:
321
+ json.dump(result, f, indent=2, ensure_ascii=False)
322
+
323
+ self.logger.info(f"Aliyun NLS transcription completed")
324
+ return result
325
+
326
+ except Exception as e:
327
+ self.logger.error(f"Aliyun NLS transcription failed: {e}")
328
+ raise
329
+
330
+ async def transcribe_audio(self, audio_path: str) -> Dict:
331
+ """
332
+ Transcribe an audio file to text using Aliyun NLS service.
333
+ This follows the same approach as SmartVoice.transcribe() but with async support.
334
+
335
+ This tool converts speech in audio files to text using Aliyun's Lingjie AI service.
336
+ Note: For Aliyun NLS, the audio_path should be a publicly accessible URL.
337
+
338
+ Args:
339
+ audio_path: URL of the audio file to transcribe (must be publicly accessible)
340
+
341
+ Returns:
342
+ Dictionary containing:
343
+ - text: The transcribed text
344
+ - aliyun_result: Original result from Aliyun NLS for reference
345
+ - provider: "aliyun_nls" to indicate the service used
346
+
347
+ Example:
348
+ result = await transcribe_audio("https://example.com/audio.mp3")
349
+ print(result["text"]) # Full transcription
350
+ """
351
+ try:
352
+ # First, perform the transcription using Aliyun NLS
353
+ aliyun_result = await self._transcribe_file_aliyun(audio_path)
354
+ if aliyun_result is None:
355
+ return {"error": "Aliyun NLS transcription failed", "text": ""}
356
+
357
+ # Then extract the text from the result
358
+ transcription_text = self._extract_transcription_text(aliyun_result)
359
+ if transcription_text is None:
360
+ return {"error": "Failed to extract text from Aliyun NLS result", "text": ""}
361
+
362
+ return {"text": transcription_text, "aliyun_result": aliyun_result, "provider": "aliyun_nls"}
363
+
364
+ except Exception as e:
365
+ error_msg = f"Aliyun audio transcription failed: {str(e)}"
366
+ self.logger.error(error_msg)
367
+ return {"error": error_msg, "text": ""}
368
+
369
+ async def audio_qa(self, audio_path: str, question: str) -> str:
370
+ """
371
+ Ask questions about audio content using Aliyun NLS transcription.
372
+
373
+ This tool transcribes audio content using Aliyun NLS and then uses an LLM to answer
374
+ questions about the audio based on the transcription. It's particularly effective
375
+ for Chinese audio content.
376
+
377
+ Args:
378
+ audio_path: URL of the audio file to transcribe (must be publicly accessible)
379
+ question: Question to ask about the audio content
380
+
381
+ Returns:
382
+ Answer to the question based on the audio content
383
+ """
384
+ self.logger.info(f"Processing Aliyun audio Q&A for: {audio_path}")
385
+ self.logger.info(f"Question: {question}")
386
+
387
+ try:
388
+ # Transcribe the audio using Aliyun NLS
389
+ transcription_result = await self.transcribe_audio(audio_path)
390
+
391
+ if "error" in transcription_result:
392
+ return f"Failed to transcribe audio: {transcription_result['error']}"
393
+
394
+ transcription_text = transcription_result.get("text", "")
395
+
396
+ if not transcription_text.strip():
397
+ return "No speech detected in the audio file."
398
+
399
+ # Prepare prompt for LLM analysis
400
+ prompt = f"""基于以下音频转录内容,请回答问题。
401
+
402
+ 音频文件: {audio_path}
403
+ 转录服务: 阿里云语音识别 (Aliyun NLS)
404
+ 转录内容:
405
+ {transcription_text}
406
+
407
+ 问题: {question}
408
+
409
+ 请基于上述音频内容提供清晰、详细的答案。如果转录内容不足以回答问题,请明确说明。"""
410
+
411
+ # Use LLM to analyze and answer
412
+ response = await self.llm_client.completion(
413
+ messages=[
414
+ {
415
+ "role": "system",
416
+ "content": "你是一个专门分析音频内容的助手。请基于提供的转录内容提供清晰、准确的答案。",
417
+ },
418
+ {"role": "user", "content": prompt},
419
+ ],
420
+ temperature=0.1,
421
+ max_tokens=1000,
422
+ )
423
+
424
+ return response.strip()
425
+
426
+ except Exception as e:
427
+ error_msg = f"Aliyun audio Q&A failed: {str(e)}"
428
+ self.logger.error(error_msg)
429
+ return error_msg
430
+
431
+ async def get_tools_map(self) -> Dict[str, Callable]:
432
+ """
433
+ Get the mapping of tool names to their implementation functions.
434
+
435
+ Returns:
436
+ Dictionary mapping tool names to callable functions
437
+ """
438
+ return {
439
+ "transcribe_audio": self.transcribe_audio,
440
+ "audio_qa": self.audio_qa,
441
+ }