noesium 0.1.0__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. noesium/agents/askura_agent/__init__.py +22 -0
  2. noesium/agents/askura_agent/askura_agent.py +480 -0
  3. noesium/agents/askura_agent/conversation.py +164 -0
  4. noesium/agents/askura_agent/extractor.py +175 -0
  5. noesium/agents/askura_agent/memory.py +14 -0
  6. noesium/agents/askura_agent/models.py +239 -0
  7. noesium/agents/askura_agent/prompts.py +202 -0
  8. noesium/agents/askura_agent/reflection.py +234 -0
  9. noesium/agents/askura_agent/summarizer.py +30 -0
  10. noesium/agents/askura_agent/utils.py +6 -0
  11. noesium/agents/deep_research/__init__.py +13 -0
  12. noesium/agents/deep_research/agent.py +398 -0
  13. noesium/agents/deep_research/prompts.py +84 -0
  14. noesium/agents/deep_research/schemas.py +42 -0
  15. noesium/agents/deep_research/state.py +54 -0
  16. noesium/agents/search/__init__.py +5 -0
  17. noesium/agents/search/agent.py +474 -0
  18. noesium/agents/search/state.py +28 -0
  19. noesium/core/__init__.py +1 -1
  20. noesium/core/agent/base.py +10 -2
  21. noesium/core/goalith/decomposer/llm_decomposer.py +1 -1
  22. noesium/core/llm/__init__.py +1 -1
  23. noesium/core/llm/base.py +2 -2
  24. noesium/core/llm/litellm.py +42 -21
  25. noesium/core/llm/llamacpp.py +25 -4
  26. noesium/core/llm/ollama.py +43 -22
  27. noesium/core/llm/openai.py +25 -5
  28. noesium/core/llm/openrouter.py +1 -1
  29. noesium/core/toolify/base.py +9 -2
  30. noesium/core/toolify/config.py +2 -2
  31. noesium/core/toolify/registry.py +21 -5
  32. noesium/core/tracing/opik_tracing.py +7 -7
  33. noesium/core/vector_store/__init__.py +2 -2
  34. noesium/core/vector_store/base.py +1 -1
  35. noesium/core/vector_store/pgvector.py +10 -13
  36. noesium/core/vector_store/weaviate.py +2 -1
  37. noesium/toolkits/__init__.py +1 -0
  38. noesium/toolkits/arxiv_toolkit.py +310 -0
  39. noesium/toolkits/audio_aliyun_toolkit.py +441 -0
  40. noesium/toolkits/audio_toolkit.py +370 -0
  41. noesium/toolkits/bash_toolkit.py +332 -0
  42. noesium/toolkits/document_toolkit.py +454 -0
  43. noesium/toolkits/file_edit_toolkit.py +552 -0
  44. noesium/toolkits/github_toolkit.py +395 -0
  45. noesium/toolkits/gmail_toolkit.py +575 -0
  46. noesium/toolkits/image_toolkit.py +425 -0
  47. noesium/toolkits/memory_toolkit.py +398 -0
  48. noesium/toolkits/python_executor_toolkit.py +334 -0
  49. noesium/toolkits/search_toolkit.py +451 -0
  50. noesium/toolkits/serper_toolkit.py +623 -0
  51. noesium/toolkits/tabular_data_toolkit.py +537 -0
  52. noesium/toolkits/user_interaction_toolkit.py +365 -0
  53. noesium/toolkits/video_toolkit.py +168 -0
  54. noesium/toolkits/wikipedia_toolkit.py +420 -0
  55. noesium-0.2.1.dist-info/METADATA +253 -0
  56. {noesium-0.1.0.dist-info → noesium-0.2.1.dist-info}/RECORD +59 -23
  57. {noesium-0.1.0.dist-info → noesium-0.2.1.dist-info}/licenses/LICENSE +1 -1
  58. noesium-0.1.0.dist-info/METADATA +0 -525
  59. {noesium-0.1.0.dist-info → noesium-0.2.1.dist-info}/WHEEL +0 -0
  60. {noesium-0.1.0.dist-info → noesium-0.2.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,370 @@
1
+ """
2
+ Audio processing toolkit for transcription and analysis.
3
+
4
+ Provides tools for audio transcription using OpenAI's Whisper API and
5
+ audio content analysis using LLMs.
6
+ """
7
+
8
+ import hashlib
9
+ import os
10
+ from pathlib import Path
11
+ from typing import Callable, Dict
12
+ from urllib.parse import urlparse
13
+
14
+ import aiohttp
15
+
16
+ from noesium.core.toolify.base import AsyncBaseToolkit
17
+ from noesium.core.toolify.config import ToolkitConfig
18
+ from noesium.core.toolify.registry import register_toolkit
19
+ from noesium.core.utils.logging import get_logger
20
+
21
+ logger = get_logger(__name__)
22
+
23
+
24
+ @register_toolkit("audio")
25
+ class AudioToolkit(AsyncBaseToolkit):
26
+ """
27
+ Toolkit for audio processing and analysis.
28
+
29
+ This toolkit provides capabilities for:
30
+ - Audio transcription using OpenAI's Whisper API
31
+ - Audio content analysis and Q&A
32
+ - Support for various audio formats
33
+ - URL and local file processing
34
+ - Caching of transcription results
35
+
36
+ Features:
37
+ - Automatic audio file downloading from URLs
38
+ - MD5-based caching to avoid re-transcribing same files
39
+ - Detailed transcription with timestamps
40
+ - LLM-powered audio content analysis
41
+ - Support for multiple audio formats (mp3, wav, m4a, etc.)
42
+
43
+ Required configuration:
44
+ - OpenAI API key for transcription
45
+ - LLM configuration for analysis
46
+ """
47
+
48
+ def __init__(self, config: ToolkitConfig = None):
49
+ """
50
+ Initialize the audio toolkit.
51
+
52
+ Args:
53
+ config: Toolkit configuration containing API keys and settings
54
+ """
55
+ super().__init__(config)
56
+
57
+ # Configuration
58
+ self.audio_model = self.config.config.get("audio_model", "whisper-1")
59
+ self.cache_dir = Path(self.config.config.get("cache_dir", "./audio_cache"))
60
+ self.download_dir = Path(self.config.config.get("download_dir", "./audio_downloads"))
61
+
62
+ # Create directories
63
+ self.cache_dir.mkdir(parents=True, exist_ok=True)
64
+ self.download_dir.mkdir(parents=True, exist_ok=True)
65
+
66
+ # Cache for MD5 to file path mapping
67
+ self.md5_to_path = {}
68
+
69
+ def _get_file_md5(self, file_path: str) -> str:
70
+ """Calculate MD5 hash of a file."""
71
+ hash_md5 = hashlib.md5()
72
+ with open(file_path, "rb") as f:
73
+ for chunk in iter(lambda: f.read(4096), b""):
74
+ hash_md5.update(chunk)
75
+ return hash_md5.hexdigest()
76
+
77
+ def _is_url(self, path: str) -> bool:
78
+ """Check if the path is a URL."""
79
+ try:
80
+ result = urlparse(path)
81
+ return all([result.scheme, result.netloc])
82
+ except Exception:
83
+ return False
84
+
85
+ def _get_file_extension(self, path: str) -> str:
86
+ """Get file extension from path or URL."""
87
+ parsed = urlparse(path)
88
+ return Path(parsed.path).suffix or ".mp3" # Default to .mp3
89
+
90
+ async def _download_audio(self, url: str, output_path: Path) -> Path:
91
+ """Download audio file from URL."""
92
+ self.logger.info(f"Downloading audio from: {url}")
93
+
94
+ try:
95
+ async with aiohttp.ClientSession() as session:
96
+ async with session.get(url) as response:
97
+ response.raise_for_status()
98
+
99
+ with open(output_path, "wb") as f:
100
+ async for chunk in response.content.iter_chunked(8192):
101
+ f.write(chunk)
102
+
103
+ self.logger.info(f"Audio downloaded to: {output_path}")
104
+ return output_path
105
+
106
+ except Exception as e:
107
+ self.logger.error(f"Failed to download audio: {e}")
108
+ raise
109
+
110
+ async def _handle_audio_path(self, audio_path: str) -> str:
111
+ """
112
+ Handle audio path - download if URL, calculate MD5, and cache.
113
+
114
+ Args:
115
+ audio_path: Path or URL to audio file
116
+
117
+ Returns:
118
+ MD5 hash of the audio file
119
+ """
120
+ if self._is_url(audio_path):
121
+ # Generate filename based on URL
122
+ ext = self._get_file_extension(audio_path)
123
+ url_hash = hashlib.md5(audio_path.encode()).hexdigest()[:8]
124
+ local_path = self.download_dir / f"{url_hash}{ext}"
125
+
126
+ # Download if not already cached
127
+ if not local_path.exists():
128
+ await self._download_audio(audio_path, local_path)
129
+
130
+ file_path = str(local_path)
131
+ else:
132
+ # Local file
133
+ if not os.path.exists(audio_path):
134
+ raise FileNotFoundError(f"Audio file not found: {audio_path}")
135
+ file_path = audio_path
136
+
137
+ # Calculate MD5 and cache the mapping
138
+ md5_hash = self._get_file_md5(file_path)
139
+ self.md5_to_path[md5_hash] = file_path
140
+
141
+ return md5_hash
142
+
143
+ async def _transcribe_audio(self, md5_hash: str) -> Dict:
144
+ """
145
+ Transcribe audio file using OpenAI's Whisper API.
146
+
147
+ Args:
148
+ md5_hash: MD5 hash of the audio file
149
+
150
+ Returns:
151
+ Transcription result with text and metadata
152
+ """
153
+ # Check cache first
154
+ cache_file = self.cache_dir / f"{md5_hash}.json"
155
+ if cache_file.exists():
156
+ import json
157
+
158
+ with open(cache_file, "r") as f:
159
+ return json.load(f)
160
+
161
+ # Get file path
162
+ if md5_hash not in self.md5_to_path:
163
+ raise ValueError(f"Audio file with MD5 {md5_hash} not found in cache")
164
+
165
+ file_path = self.md5_to_path[md5_hash]
166
+
167
+ try:
168
+ # Use the LLM client's OpenAI client for transcription
169
+ import openai
170
+
171
+ # Get OpenAI client from LLM client
172
+ client = self.llm_client._client if hasattr(self.llm_client, "_client") else None
173
+ if not client:
174
+ # Fallback: create OpenAI client directly
175
+ api_key = self.config.config.get("OPENAI_API_KEY") or os.getenv("OPENAI_API_KEY")
176
+ if not api_key:
177
+ raise ValueError("OpenAI API key not found in config or environment")
178
+ client = openai.AsyncOpenAI(api_key=api_key)
179
+
180
+ self.logger.info(f"Transcribing audio file: {file_path}")
181
+
182
+ with open(file_path, "rb") as audio_file:
183
+ transcript = await client.audio.transcriptions.create(
184
+ model=self.audio_model,
185
+ file=audio_file,
186
+ response_format="verbose_json",
187
+ timestamp_granularities=["segment"] if self.audio_model != "whisper-1" else None,
188
+ )
189
+
190
+ # Convert to dict and cache
191
+ result = transcript.model_dump() if hasattr(transcript, "model_dump") else dict(transcript)
192
+
193
+ # Cache the result
194
+ import json
195
+
196
+ with open(cache_file, "w") as f:
197
+ json.dump(result, f, indent=2)
198
+
199
+ self.logger.info(f"Transcription completed, duration: {result.get('duration', 'unknown')}s")
200
+ return result
201
+
202
+ except Exception as e:
203
+ self.logger.error(f"Transcription failed: {e}")
204
+ raise
205
+
206
+ async def transcribe_audio(self, audio_path: str) -> Dict:
207
+ """
208
+ Transcribe an audio file to text.
209
+
210
+ This tool converts speech in audio files to text using OpenAI's Whisper API.
211
+ It supports various audio formats and can handle both local files and URLs.
212
+
213
+ Features:
214
+ - Supports multiple audio formats (mp3, wav, m4a, flac, etc.)
215
+ - Automatic downloading from URLs
216
+ - Caching to avoid re-transcribing the same files
217
+ - Detailed output with timestamps (when supported)
218
+ - Duration and language detection
219
+
220
+ Args:
221
+ audio_path: Path to local audio file or URL to audio file
222
+
223
+ Returns:
224
+ Dictionary containing:
225
+ - text: The transcribed text
226
+ - duration: Audio duration in seconds
227
+ - language: Detected language (if available)
228
+ - segments: Timestamped segments (if available)
229
+
230
+ Example:
231
+ result = await transcribe_audio("https://example.com/audio.mp3")
232
+ print(result["text"]) # Full transcription
233
+ for segment in result.get("segments", []):
234
+ print(f"{segment['start']:.2f}s: {segment['text']}")
235
+ """
236
+ try:
237
+ md5_hash = await self._handle_audio_path(audio_path)
238
+ result = await self._transcribe_audio(md5_hash)
239
+ return result
240
+
241
+ except Exception as e:
242
+ error_msg = f"Audio transcription failed: {str(e)}"
243
+ self.logger.error(error_msg)
244
+ return {"error": error_msg, "text": ""}
245
+
246
+ async def audio_qa(self, audio_path: str, question: str) -> str:
247
+ """
248
+ Ask questions about audio content.
249
+
250
+ This tool transcribes audio content and then uses an LLM to answer
251
+ questions about the audio based on the transcription. It's useful for
252
+ analyzing conversations, lectures, interviews, or any spoken content.
253
+
254
+ Use cases:
255
+ - Summarizing audio content
256
+ - Extracting key information from recordings
257
+ - Answering specific questions about audio content
258
+ - Analyzing sentiment or themes in audio
259
+
260
+ Args:
261
+ audio_path: Path to local audio file or URL to audio file
262
+ question: Question to ask about the audio content
263
+
264
+ Returns:
265
+ Answer to the question based on the audio content
266
+
267
+ Examples:
268
+ - "What are the main topics discussed in this meeting?"
269
+ - "Who are the speakers and what are their main points?"
270
+ - "Summarize the key decisions made in this recording"
271
+ - "What is the overall sentiment of this conversation?"
272
+ """
273
+ self.logger.info(f"Processing audio Q&A for: {audio_path}")
274
+ self.logger.info(f"Question: {question}")
275
+
276
+ try:
277
+ # Transcribe the audio
278
+ md5_hash = await self._handle_audio_path(audio_path)
279
+ transcription_result = await self._transcribe_audio(md5_hash)
280
+
281
+ if "error" in transcription_result:
282
+ return f"Failed to transcribe audio: {transcription_result['error']}"
283
+
284
+ transcription_text = transcription_result.get("text", "")
285
+ duration = transcription_result.get("duration", "unknown")
286
+
287
+ if not transcription_text.strip():
288
+ return "No speech detected in the audio file."
289
+
290
+ # Prepare prompt for LLM analysis
291
+ prompt = f"""Based on the following audio transcription, please answer the question.
292
+
293
+ Audio File: {audio_path}
294
+ Duration: {duration} seconds
295
+ Transcription:
296
+ {transcription_text}
297
+
298
+ Question: {question}
299
+
300
+ Please provide a clear, detailed answer based on the audio content above. If the transcription doesn't contain enough information to answer the question, please state that clearly."""
301
+
302
+ # Use LLM to analyze and answer
303
+ response = await self.llm_client.completion(
304
+ messages=[
305
+ {
306
+ "role": "system",
307
+ "content": "You are a helpful assistant specializing in audio content analysis. Provide clear, accurate answers based on the provided transcription.",
308
+ },
309
+ {"role": "user", "content": prompt},
310
+ ],
311
+ temperature=0.1,
312
+ max_tokens=1000,
313
+ )
314
+
315
+ return response.strip()
316
+
317
+ except Exception as e:
318
+ error_msg = f"Audio Q&A failed: {str(e)}"
319
+ self.logger.error(error_msg)
320
+ return error_msg
321
+
322
+ async def get_audio_info(self, audio_path: str) -> Dict:
323
+ """
324
+ Get information about an audio file including transcription metadata.
325
+
326
+ Args:
327
+ audio_path: Path to local audio file or URL to audio file
328
+
329
+ Returns:
330
+ Dictionary with audio information and transcription metadata
331
+ """
332
+ try:
333
+ md5_hash = await self._handle_audio_path(audio_path)
334
+ file_path = self.md5_to_path[md5_hash]
335
+
336
+ # Get basic file info
337
+ file_stat = os.stat(file_path)
338
+ file_size = file_stat.st_size
339
+
340
+ # Get transcription info
341
+ transcription_result = await self._transcribe_audio(md5_hash)
342
+
343
+ return {
344
+ "file_path": audio_path,
345
+ "local_path": file_path,
346
+ "file_size_bytes": file_size,
347
+ "file_size_mb": round(file_size / (1024 * 1024), 2),
348
+ "md5_hash": md5_hash,
349
+ "duration_seconds": transcription_result.get("duration"),
350
+ "detected_language": transcription_result.get("language"),
351
+ "transcription_length": len(transcription_result.get("text", "")),
352
+ "has_segments": "segments" in transcription_result,
353
+ "segment_count": len(transcription_result.get("segments", [])),
354
+ }
355
+
356
+ except Exception as e:
357
+ return {"error": f"Failed to get audio info: {str(e)}"}
358
+
359
+ async def get_tools_map(self) -> Dict[str, Callable]:
360
+ """
361
+ Get the mapping of tool names to their implementation functions.
362
+
363
+ Returns:
364
+ Dictionary mapping tool names to callable functions
365
+ """
366
+ return {
367
+ "transcribe_audio": self.transcribe_audio,
368
+ "audio_qa": self.audio_qa,
369
+ "get_audio_info": self.get_audio_info,
370
+ }
@@ -0,0 +1,332 @@
1
+ """
2
+ Bash command execution toolkit.
3
+
4
+ Provides safe execution of bash commands in a persistent shell environment
5
+ with comprehensive error handling and security features.
6
+ """
7
+
8
+ import re
9
+ from typing import Callable, Dict, Optional
10
+
11
+ from noesium.core.toolify.base import AsyncBaseToolkit
12
+ from noesium.core.toolify.config import ToolkitConfig
13
+ from noesium.core.toolify.registry import register_toolkit
14
+ from noesium.core.utils.logging import get_logger
15
+
16
+ logger = get_logger(__name__)
17
+
18
+ # ANSI escape sequence regex for cleaning output
19
+ ANSI_ESCAPE = re.compile(r"\x1B\[[0-?]*[ -/]*[@-~]")
20
+
21
+
22
+ @register_toolkit("bash")
23
+ class BashToolkit(AsyncBaseToolkit):
24
+ """
25
+ Toolkit for executing bash commands in a persistent shell environment.
26
+
27
+ Features:
28
+ - Persistent shell session across commands
29
+ - Command filtering and security checks
30
+ - ANSI escape sequence cleaning
31
+ - Configurable workspace directory
32
+ - Timeout protection
33
+ - Automatic shell recovery on errors
34
+
35
+ Security features:
36
+ - Banned command filtering
37
+ - Workspace isolation
38
+ - Command validation
39
+
40
+ The toolkit maintains a persistent bash shell using pexpect, allowing
41
+ for stateful command execution where environment variables, directory
42
+ changes, and other shell state persist between commands.
43
+ """
44
+
45
+ def __init__(self, config: ToolkitConfig = None):
46
+ """
47
+ Initialize the bash toolkit.
48
+
49
+ Args:
50
+ config: Toolkit configuration
51
+ """
52
+ super().__init__(config)
53
+
54
+ # Configuration
55
+ self.workspace_root = self.config.config.get("workspace_root", "/tmp/noesium_workspace")
56
+ self.timeout = self.config.config.get("timeout", 60)
57
+ self.max_output_length = self.config.config.get("max_output_length", 10000)
58
+
59
+ # Security configuration
60
+ self.banned_commands = self.config.config.get(
61
+ "banned_commands",
62
+ [
63
+ "rm -rf /",
64
+ "rm -rf ~",
65
+ "rm -rf ./*",
66
+ "rm -rf *",
67
+ "mkfs",
68
+ "dd if=",
69
+ ":(){ :|:& };:", # Fork bomb
70
+ "sudo rm",
71
+ "sudo dd",
72
+ ],
73
+ )
74
+
75
+ self.banned_command_patterns = self.config.config.get(
76
+ "banned_command_patterns",
77
+ [
78
+ r"git\s+init",
79
+ r"git\s+commit",
80
+ r"git\s+add",
81
+ r"rm\s+-rf\s+/",
82
+ r"sudo\s+rm\s+-rf",
83
+ ],
84
+ )
85
+
86
+ # Shell state
87
+ self.child = None
88
+ self.custom_prompt = None
89
+ self._shell_initialized = False
90
+
91
+ async def build(self):
92
+ """Initialize the persistent shell."""
93
+ await super().build()
94
+ if not self._shell_initialized:
95
+ self._initialize_shell()
96
+ self._setup_workspace()
97
+ self._shell_initialized = True
98
+
99
+ async def cleanup(self):
100
+ """Cleanup shell resources."""
101
+ if self.child:
102
+ try:
103
+ self.child.close()
104
+ except Exception as e:
105
+ self.logger.warning(f"Error closing shell: {e}")
106
+ self.child = None
107
+ self.custom_prompt = None
108
+ self._shell_initialized = False
109
+ await super().cleanup()
110
+
111
+ def _initialize_shell(self):
112
+ """Initialize a persistent bash shell with custom prompt."""
113
+ try:
114
+ import pexpect
115
+ except ImportError:
116
+ raise ImportError("pexpect is required for bash toolkit. Install with: pip install pexpect")
117
+
118
+ try:
119
+ # Start a new bash shell
120
+ self.child = pexpect.spawn("/bin/bash", encoding="utf-8", echo=False, timeout=self.timeout)
121
+
122
+ # Set up a unique prompt for reliable command detection
123
+ self.custom_prompt = "NOESIUM_BASH_PROMPT>> "
124
+
125
+ # Configure shell for better interaction
126
+ self.child.sendline("stty -onlcr") # Disable automatic newline conversion
127
+ self.child.sendline("unset PROMPT_COMMAND") # Remove any custom prompt command
128
+ self.child.sendline(f"PS1='{self.custom_prompt}'") # Set our custom prompt
129
+
130
+ # Wait for the prompt to appear
131
+ self.child.expect(self.custom_prompt)
132
+
133
+ self.logger.info("Bash shell initialized successfully")
134
+
135
+ except Exception as e:
136
+ self.logger.error(f"Failed to initialize bash shell: {e}")
137
+ raise
138
+
139
+ def _setup_workspace(self):
140
+ """Set up the workspace directory."""
141
+ if self.workspace_root:
142
+ try:
143
+ # Create workspace directory and navigate to it
144
+ self._run_command_internal(f"mkdir -p {self.workspace_root}")
145
+ self._run_command_internal(f"cd {self.workspace_root}")
146
+ self.logger.info(f"Workspace set up at: {self.workspace_root}")
147
+ except Exception as e:
148
+ self.logger.warning(f"Failed to setup workspace: {e}")
149
+
150
+ def _run_command_internal(self, command: str) -> str:
151
+ """
152
+ Internal method to run a command in the persistent shell.
153
+
154
+ Args:
155
+ command: Command to execute
156
+
157
+ Returns:
158
+ Command output as string
159
+ """
160
+ if not self.child:
161
+ raise RuntimeError("Shell not initialized")
162
+
163
+ try:
164
+ # Send the command
165
+ self.child.sendline(command)
166
+
167
+ # Wait for the prompt to return
168
+ self.child.expect(self.custom_prompt)
169
+
170
+ # Get the output (everything before the prompt)
171
+ raw_output = self.child.before.strip()
172
+
173
+ # Clean ANSI escape sequences
174
+ clean_output = ANSI_ESCAPE.sub("", raw_output)
175
+
176
+ # Remove leading carriage return if present
177
+ if clean_output.startswith("\r"):
178
+ clean_output = clean_output[1:]
179
+
180
+ return clean_output
181
+
182
+ except Exception as e:
183
+ self.logger.error(f"Command execution failed: {e}")
184
+ raise
185
+
186
+ def _validate_command(self, command: str) -> Optional[str]:
187
+ """
188
+ Validate a command against security rules.
189
+
190
+ Args:
191
+ command: Command to validate
192
+
193
+ Returns:
194
+ Error message if command is invalid, None if valid
195
+ """
196
+ # Check banned commands
197
+ for banned in self.banned_commands:
198
+ if banned in command:
199
+ return f"Command contains banned string: '{banned}'"
200
+
201
+ # Check banned patterns
202
+ for pattern in self.banned_command_patterns:
203
+ if re.search(pattern, command, re.IGNORECASE):
204
+ return f"Command matches banned pattern: '{pattern}'"
205
+
206
+ return None
207
+
208
+ def _recover_shell(self):
209
+ """Recover the shell if it becomes unresponsive."""
210
+ self.logger.warning("Attempting to recover shell...")
211
+ try:
212
+ if self.child:
213
+ self.child.close()
214
+ except Exception:
215
+ pass
216
+
217
+ self._initialize_shell()
218
+ self._setup_workspace()
219
+ self.logger.info("Shell recovered successfully")
220
+
221
+ async def run_bash(self, command: str) -> str:
222
+ """
223
+ Execute a bash command and return its output.
224
+
225
+ This tool provides access to a persistent bash shell where you can run
226
+ commands, navigate directories, set environment variables, and perform
227
+ file operations. The shell state persists between command calls.
228
+
229
+ Security features:
230
+ - Commands are validated against banned patterns
231
+ - Output is limited to prevent excessive data
232
+ - Shell recovery on errors
233
+ - Workspace isolation
234
+
235
+ Usage guidelines:
236
+ - State persists across commands (cd, export, etc.)
237
+ - Avoid commands that produce very large output
238
+ - Use background processes for long-running commands (command &)
239
+ - Be cautious with destructive operations
240
+
241
+ Args:
242
+ command: The bash command to execute
243
+
244
+ Returns:
245
+ Command output or error message
246
+
247
+ Examples:
248
+ - run_bash("ls -la")
249
+ - run_bash("cd /tmp && pwd")
250
+ - run_bash("export VAR=value && echo $VAR")
251
+ - run_bash("python -c 'print(\"Hello World\")'")
252
+ """
253
+ self.logger.info(f"Executing bash command: {command}")
254
+
255
+ # Validate command
256
+ validation_error = self._validate_command(command)
257
+ if validation_error:
258
+ self.logger.warning(f"Command rejected: {validation_error}")
259
+ return f"Error: {validation_error}"
260
+
261
+ # Ensure shell is ready
262
+ if not self._shell_initialized:
263
+ await self.build()
264
+
265
+ try:
266
+ # Test shell responsiveness
267
+ try:
268
+ test_result = self._run_command_internal("echo test")
269
+ if "test" not in test_result:
270
+ raise Exception("Shell not responding correctly")
271
+ except Exception:
272
+ self._recover_shell()
273
+
274
+ # Execute the actual command
275
+ result = self._run_command_internal(command)
276
+
277
+ # Limit output length
278
+ if len(result) > self.max_output_length:
279
+ result = result[: self.max_output_length] + f"\n... (output truncated, {len(result)} total characters)"
280
+
281
+ self.logger.info(f"Command executed successfully, output length: {len(result)}")
282
+
283
+ return result
284
+
285
+ except Exception as e:
286
+ error_msg = f"Command execution failed: {str(e)}"
287
+ self.logger.error(error_msg)
288
+
289
+ # Attempt recovery for next command
290
+ try:
291
+ self._recover_shell()
292
+ except Exception as recovery_error:
293
+ self.logger.error(f"Shell recovery failed: {recovery_error}")
294
+
295
+ return f"Error: {error_msg}"
296
+
297
+ async def get_current_directory(self) -> str:
298
+ """
299
+ Get the current working directory of the shell.
300
+
301
+ Returns:
302
+ Current directory path
303
+ """
304
+ try:
305
+ return await self.run_bash("pwd")
306
+ except Exception as e:
307
+ return f"Error getting current directory: {e}"
308
+
309
+ async def list_directory(self, path: str = ".") -> str:
310
+ """
311
+ List contents of a directory.
312
+
313
+ Args:
314
+ path: Directory path to list (default: current directory)
315
+
316
+ Returns:
317
+ Directory listing
318
+ """
319
+ return await self.run_bash(f"ls -la {path}")
320
+
321
+ async def get_tools_map(self) -> Dict[str, Callable]:
322
+ """
323
+ Get the mapping of tool names to their implementation functions.
324
+
325
+ Returns:
326
+ Dictionary mapping tool names to callable functions
327
+ """
328
+ return {
329
+ "run_bash": self.run_bash,
330
+ "get_current_directory": self.get_current_directory,
331
+ "list_directory": self.list_directory,
332
+ }