amd-gaia 0.15.0__py3-none-any.whl → 0.15.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/METADATA +222 -223
  2. amd_gaia-0.15.2.dist-info/RECORD +182 -0
  3. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/WHEEL +1 -1
  4. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/entry_points.txt +1 -0
  5. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/licenses/LICENSE.md +20 -20
  6. gaia/__init__.py +29 -29
  7. gaia/agents/__init__.py +19 -19
  8. gaia/agents/base/__init__.py +9 -9
  9. gaia/agents/base/agent.py +2132 -2177
  10. gaia/agents/base/api_agent.py +119 -120
  11. gaia/agents/base/console.py +1967 -1841
  12. gaia/agents/base/errors.py +237 -237
  13. gaia/agents/base/mcp_agent.py +86 -86
  14. gaia/agents/base/tools.py +88 -83
  15. gaia/agents/blender/__init__.py +7 -0
  16. gaia/agents/blender/agent.py +553 -556
  17. gaia/agents/blender/agent_simple.py +133 -135
  18. gaia/agents/blender/app.py +211 -211
  19. gaia/agents/blender/app_simple.py +41 -41
  20. gaia/agents/blender/core/__init__.py +16 -16
  21. gaia/agents/blender/core/materials.py +506 -506
  22. gaia/agents/blender/core/objects.py +316 -316
  23. gaia/agents/blender/core/rendering.py +225 -225
  24. gaia/agents/blender/core/scene.py +220 -220
  25. gaia/agents/blender/core/view.py +146 -146
  26. gaia/agents/chat/__init__.py +9 -9
  27. gaia/agents/chat/agent.py +809 -835
  28. gaia/agents/chat/app.py +1065 -1058
  29. gaia/agents/chat/session.py +508 -508
  30. gaia/agents/chat/tools/__init__.py +15 -15
  31. gaia/agents/chat/tools/file_tools.py +96 -96
  32. gaia/agents/chat/tools/rag_tools.py +1744 -1729
  33. gaia/agents/chat/tools/shell_tools.py +437 -436
  34. gaia/agents/code/__init__.py +7 -7
  35. gaia/agents/code/agent.py +549 -549
  36. gaia/agents/code/cli.py +377 -0
  37. gaia/agents/code/models.py +135 -135
  38. gaia/agents/code/orchestration/__init__.py +24 -24
  39. gaia/agents/code/orchestration/checklist_executor.py +1763 -1763
  40. gaia/agents/code/orchestration/checklist_generator.py +713 -713
  41. gaia/agents/code/orchestration/factories/__init__.py +9 -9
  42. gaia/agents/code/orchestration/factories/base.py +63 -63
  43. gaia/agents/code/orchestration/factories/nextjs_factory.py +118 -118
  44. gaia/agents/code/orchestration/factories/python_factory.py +106 -106
  45. gaia/agents/code/orchestration/orchestrator.py +841 -841
  46. gaia/agents/code/orchestration/project_analyzer.py +391 -391
  47. gaia/agents/code/orchestration/steps/__init__.py +67 -67
  48. gaia/agents/code/orchestration/steps/base.py +188 -188
  49. gaia/agents/code/orchestration/steps/error_handler.py +314 -314
  50. gaia/agents/code/orchestration/steps/nextjs.py +828 -828
  51. gaia/agents/code/orchestration/steps/python.py +307 -307
  52. gaia/agents/code/orchestration/template_catalog.py +469 -469
  53. gaia/agents/code/orchestration/workflows/__init__.py +14 -14
  54. gaia/agents/code/orchestration/workflows/base.py +80 -80
  55. gaia/agents/code/orchestration/workflows/nextjs.py +186 -186
  56. gaia/agents/code/orchestration/workflows/python.py +94 -94
  57. gaia/agents/code/prompts/__init__.py +11 -11
  58. gaia/agents/code/prompts/base_prompt.py +77 -77
  59. gaia/agents/code/prompts/code_patterns.py +2034 -2036
  60. gaia/agents/code/prompts/nextjs_prompt.py +40 -40
  61. gaia/agents/code/prompts/python_prompt.py +109 -109
  62. gaia/agents/code/schema_inference.py +365 -365
  63. gaia/agents/code/system_prompt.py +41 -41
  64. gaia/agents/code/tools/__init__.py +42 -42
  65. gaia/agents/code/tools/cli_tools.py +1138 -1138
  66. gaia/agents/code/tools/code_formatting.py +319 -319
  67. gaia/agents/code/tools/code_tools.py +769 -769
  68. gaia/agents/code/tools/error_fixing.py +1347 -1347
  69. gaia/agents/code/tools/external_tools.py +180 -180
  70. gaia/agents/code/tools/file_io.py +845 -845
  71. gaia/agents/code/tools/prisma_tools.py +190 -190
  72. gaia/agents/code/tools/project_management.py +1016 -1016
  73. gaia/agents/code/tools/testing.py +321 -321
  74. gaia/agents/code/tools/typescript_tools.py +122 -122
  75. gaia/agents/code/tools/validation_parsing.py +461 -461
  76. gaia/agents/code/tools/validation_tools.py +806 -806
  77. gaia/agents/code/tools/web_dev_tools.py +1758 -1758
  78. gaia/agents/code/validators/__init__.py +16 -16
  79. gaia/agents/code/validators/antipattern_checker.py +241 -241
  80. gaia/agents/code/validators/ast_analyzer.py +197 -197
  81. gaia/agents/code/validators/requirements_validator.py +145 -145
  82. gaia/agents/code/validators/syntax_validator.py +171 -171
  83. gaia/agents/docker/__init__.py +7 -7
  84. gaia/agents/docker/agent.py +643 -642
  85. gaia/agents/emr/__init__.py +8 -8
  86. gaia/agents/emr/agent.py +1504 -1506
  87. gaia/agents/emr/cli.py +1322 -1322
  88. gaia/agents/emr/constants.py +475 -475
  89. gaia/agents/emr/dashboard/__init__.py +4 -4
  90. gaia/agents/emr/dashboard/server.py +1972 -1974
  91. gaia/agents/jira/__init__.py +11 -11
  92. gaia/agents/jira/agent.py +894 -894
  93. gaia/agents/jira/jql_templates.py +299 -299
  94. gaia/agents/routing/__init__.py +7 -7
  95. gaia/agents/routing/agent.py +567 -570
  96. gaia/agents/routing/system_prompt.py +75 -75
  97. gaia/agents/summarize/__init__.py +11 -0
  98. gaia/agents/summarize/agent.py +885 -0
  99. gaia/agents/summarize/prompts.py +129 -0
  100. gaia/api/__init__.py +23 -23
  101. gaia/api/agent_registry.py +238 -238
  102. gaia/api/app.py +305 -305
  103. gaia/api/openai_server.py +575 -575
  104. gaia/api/schemas.py +186 -186
  105. gaia/api/sse_handler.py +373 -373
  106. gaia/apps/__init__.py +4 -4
  107. gaia/apps/llm/__init__.py +6 -6
  108. gaia/apps/llm/app.py +184 -169
  109. gaia/apps/summarize/app.py +116 -633
  110. gaia/apps/summarize/html_viewer.py +133 -133
  111. gaia/apps/summarize/pdf_formatter.py +284 -284
  112. gaia/audio/__init__.py +2 -2
  113. gaia/audio/audio_client.py +439 -439
  114. gaia/audio/audio_recorder.py +269 -269
  115. gaia/audio/kokoro_tts.py +599 -599
  116. gaia/audio/whisper_asr.py +432 -432
  117. gaia/chat/__init__.py +16 -16
  118. gaia/chat/app.py +428 -430
  119. gaia/chat/prompts.py +522 -522
  120. gaia/chat/sdk.py +1228 -1225
  121. gaia/cli.py +5659 -5632
  122. gaia/database/__init__.py +10 -10
  123. gaia/database/agent.py +176 -176
  124. gaia/database/mixin.py +290 -290
  125. gaia/database/testing.py +64 -64
  126. gaia/eval/batch_experiment.py +2332 -2332
  127. gaia/eval/claude.py +542 -542
  128. gaia/eval/config.py +37 -37
  129. gaia/eval/email_generator.py +512 -512
  130. gaia/eval/eval.py +3179 -3179
  131. gaia/eval/groundtruth.py +1130 -1130
  132. gaia/eval/transcript_generator.py +582 -582
  133. gaia/eval/webapp/README.md +167 -167
  134. gaia/eval/webapp/package-lock.json +875 -875
  135. gaia/eval/webapp/package.json +20 -20
  136. gaia/eval/webapp/public/app.js +3402 -3402
  137. gaia/eval/webapp/public/index.html +87 -87
  138. gaia/eval/webapp/public/styles.css +3661 -3661
  139. gaia/eval/webapp/server.js +415 -415
  140. gaia/eval/webapp/test-setup.js +72 -72
  141. gaia/installer/__init__.py +23 -0
  142. gaia/installer/init_command.py +1275 -0
  143. gaia/installer/lemonade_installer.py +619 -0
  144. gaia/llm/__init__.py +10 -2
  145. gaia/llm/base_client.py +60 -0
  146. gaia/llm/exceptions.py +12 -0
  147. gaia/llm/factory.py +70 -0
  148. gaia/llm/lemonade_client.py +3421 -3221
  149. gaia/llm/lemonade_manager.py +294 -294
  150. gaia/llm/providers/__init__.py +9 -0
  151. gaia/llm/providers/claude.py +108 -0
  152. gaia/llm/providers/lemonade.py +118 -0
  153. gaia/llm/providers/openai_provider.py +79 -0
  154. gaia/llm/vlm_client.py +382 -382
  155. gaia/logger.py +189 -189
  156. gaia/mcp/agent_mcp_server.py +245 -245
  157. gaia/mcp/blender_mcp_client.py +138 -138
  158. gaia/mcp/blender_mcp_server.py +648 -648
  159. gaia/mcp/context7_cache.py +332 -332
  160. gaia/mcp/external_services.py +518 -518
  161. gaia/mcp/mcp_bridge.py +811 -550
  162. gaia/mcp/servers/__init__.py +6 -6
  163. gaia/mcp/servers/docker_mcp.py +83 -83
  164. gaia/perf_analysis.py +361 -0
  165. gaia/rag/__init__.py +10 -10
  166. gaia/rag/app.py +293 -293
  167. gaia/rag/demo.py +304 -304
  168. gaia/rag/pdf_utils.py +235 -235
  169. gaia/rag/sdk.py +2194 -2194
  170. gaia/security.py +183 -163
  171. gaia/talk/app.py +287 -289
  172. gaia/talk/sdk.py +538 -538
  173. gaia/testing/__init__.py +87 -87
  174. gaia/testing/assertions.py +330 -330
  175. gaia/testing/fixtures.py +333 -333
  176. gaia/testing/mocks.py +493 -493
  177. gaia/util.py +46 -46
  178. gaia/utils/__init__.py +33 -33
  179. gaia/utils/file_watcher.py +675 -675
  180. gaia/utils/parsing.py +223 -223
  181. gaia/version.py +100 -100
  182. amd_gaia-0.15.0.dist-info/RECORD +0 -168
  183. gaia/agents/code/app.py +0 -266
  184. gaia/llm/llm_client.py +0 -723
  185. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/top_level.txt +0 -0
gaia/agents/chat/agent.py CHANGED
@@ -1,835 +1,809 @@
1
- # Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
2
- # SPDX-License-Identifier: MIT
3
- """
4
- Chat Agent - Interactive chat with RAG and file search capabilities.
5
- """
6
-
7
- import os
8
- from dataclasses import dataclass, field
9
- from pathlib import Path
10
- from typing import Any, Dict, List, Optional
11
-
12
- try:
13
- from watchdog.observers import Observer
14
- except ImportError:
15
- Observer = None
16
-
17
- from gaia.agents.base.agent import Agent
18
- from gaia.agents.base.console import AgentConsole
19
- from gaia.agents.chat.session import SessionManager
20
- from gaia.agents.chat.tools import FileToolsMixin, RAGToolsMixin, ShellToolsMixin
21
- from gaia.agents.tools import FileSearchToolsMixin # Shared file search tools
22
- from gaia.logger import get_logger
23
- from gaia.rag.sdk import RAGSDK, RAGConfig
24
- from gaia.security import PathValidator
25
- from gaia.utils.file_watcher import FileChangeHandler, check_watchdog_available
26
-
27
- logger = get_logger(__name__)
28
-
29
-
30
- @dataclass
31
- class ChatAgentConfig:
32
- """Configuration for ChatAgent."""
33
-
34
- # LLM settings
35
- use_claude: bool = False
36
- use_chatgpt: bool = False
37
- claude_model: str = "claude-sonnet-4-20250514"
38
- base_url: str = "http://localhost:8000/api/v1"
39
- model_id: Optional[str] = None # None = use default Qwen3-Coder-30B
40
-
41
- # Execution settings
42
- max_steps: int = 10
43
- streaming: bool = False # Use --streaming to enable
44
-
45
- # Debug/output settings
46
- debug: bool = False
47
- debug_prompts: bool = False # Backward compatibility
48
- show_prompts: bool = False
49
- show_stats: bool = False
50
- silent_mode: bool = False
51
- output_dir: Optional[str] = None
52
-
53
- # RAG settings
54
- rag_documents: List[str] = field(default_factory=list)
55
- watch_directories: List[str] = field(default_factory=list)
56
- chunk_size: int = 500
57
- chunk_overlap: int = 100
58
- max_chunks: int = 5
59
- use_llm_chunking: bool = False # Use fast heuristic-based chunking by default
60
-
61
- # Security
62
- allowed_paths: Optional[List[str]] = None
63
-
64
-
65
- class ChatAgent(
66
- Agent, RAGToolsMixin, FileToolsMixin, ShellToolsMixin, FileSearchToolsMixin
67
- ):
68
- """
69
- Chat Agent with RAG, file operations, and shell command capabilities.
70
-
71
- This agent provides:
72
- - Document Q&A using RAG
73
- - File search and operations
74
- - Shell command execution
75
- - Auto-indexing when files change
76
- - Interactive chat interface
77
- - Session persistence with auto-save
78
- - MCP server integration
79
- """
80
-
81
- # Define simple tools that can execute without requiring a multi-step plan
82
- SIMPLE_TOOLS = [
83
- "list_indexed_documents",
84
- "rag_status",
85
- "query_documents",
86
- "query_specific_file",
87
- "search_indexed_chunks", # RAG: Search indexed document chunks
88
- "dump_document", # RAG: Export cached extracted text
89
- "search_file_content", # Shared: Grep-like disk search
90
- "search_file", # Shared: Find files by name
91
- "search_directory", # Shared: Find directories by name
92
- "read_file", # Shared: Read any file
93
- "write_file", # Shared: Write any file
94
- "index_directory", # RAG: Index directory
95
- "run_shell_command", # Shell: Execute commands
96
- ]
97
-
98
- def __init__(self, config: Optional[ChatAgentConfig] = None):
99
- """
100
- Initialize Chat Agent.
101
-
102
- Args:
103
- config: ChatAgentConfig object with all settings. If None, uses defaults.
104
- """
105
- # Use provided config or create default
106
- if config is None:
107
- config = ChatAgentConfig()
108
-
109
- # Initialize path validator
110
- self.path_validator = PathValidator(config.allowed_paths)
111
-
112
- # Now use config for all initialization
113
- # Store RAG configuration from config
114
- self.rag_documents = config.rag_documents
115
- self.watch_directories = config.watch_directories
116
- self.chunk_size = config.chunk_size
117
- self.max_chunks = config.max_chunks
118
-
119
- # Security: Configure allowed paths for file operations
120
- # If None, allow current directory and subdirectories
121
- if config.allowed_paths is None:
122
- self.allowed_paths = [Path.cwd()]
123
- else:
124
- self.allowed_paths = [Path(p).resolve() for p in config.allowed_paths]
125
-
126
- # Use Qwen3-Coder-30B by default for better JSON parsing (same as Jira agent)
127
- effective_model_id = config.model_id or "Qwen3-Coder-30B-A3B-Instruct-GGUF"
128
-
129
- # Debug logging for model selection
130
- logger.debug(
131
- f"Model selection: model_id={repr(config.model_id)}, effective={effective_model_id}"
132
- )
133
-
134
- # Store model for display
135
- self.model_display_name = effective_model_id
136
-
137
- # Store max_chunks for adaptive retrieval
138
- self.base_max_chunks = config.max_chunks
139
-
140
- # Initialize RAG SDK (optional - will be None if dependencies not installed)
141
- try:
142
- rag_config = RAGConfig(
143
- model=effective_model_id,
144
- chunk_size=config.chunk_size,
145
- chunk_overlap=config.chunk_overlap, # Configurable overlap for context preservation
146
- max_chunks=config.max_chunks,
147
- show_stats=config.show_stats,
148
- use_local_llm=not (config.use_claude or config.use_chatgpt),
149
- use_llm_chunking=config.use_llm_chunking, # Enable semantic chunking
150
- base_url=config.base_url, # Pass base_url to RAG for VLM client
151
- )
152
- self.rag = RAGSDK(rag_config)
153
- except ImportError as e:
154
- # RAG dependencies not installed - this is fine, RAG features will be disabled
155
- logger.debug(f"RAG dependencies not available: {e}")
156
- self.rag = None
157
-
158
- # File system monitoring
159
- self.observers = []
160
- self.file_handlers = [] # Track FileChangeHandler instances for telemetry
161
- self.indexed_files = set()
162
-
163
- # Session management
164
- self.session_manager = SessionManager()
165
- self.current_session = None
166
- self.conversation_history: List[Dict[str, str]] = (
167
- []
168
- ) # Track conversation for persistence
169
-
170
- # Call parent constructor
171
- super().__init__(
172
- use_claude=config.use_claude,
173
- use_chatgpt=config.use_chatgpt,
174
- claude_model=config.claude_model,
175
- base_url=config.base_url,
176
- model_id=effective_model_id, # Pass the effective model to parent
177
- max_steps=config.max_steps,
178
- debug_prompts=config.debug_prompts,
179
- show_prompts=config.show_prompts,
180
- output_dir=config.output_dir,
181
- streaming=config.streaming,
182
- show_stats=config.show_stats,
183
- silent_mode=config.silent_mode,
184
- debug=config.debug,
185
- )
186
-
187
- # Index initial documents (only if RAG is available)
188
- if self.rag_documents and self.rag:
189
- self._index_documents(self.rag_documents)
190
- elif self.rag_documents and not self.rag:
191
- logger.warning(
192
- "RAG dependencies not installed. Cannot index documents. "
193
- 'Install with: uv pip install -e ".[rag]"'
194
- )
195
-
196
- # Start watching directories
197
- if self.watch_directories:
198
- self._start_watching()
199
-
200
- def _post_process_tool_result(
201
- self, tool_name: str, _tool_args: Dict[str, Any], tool_result: Dict[str, Any]
202
- ) -> None:
203
- """
204
- Post-process tool results for Chat Agent.
205
-
206
- Handles RAG-specific debug information display.
207
-
208
- Args:
209
- tool_name: Name of the tool that was executed
210
- _tool_args: Arguments that were passed to the tool (unused)
211
- tool_result: Result returned by the tool
212
- """
213
- # Handle RAG query debug information
214
- if (
215
- tool_name
216
- in ["query_documents", "query_specific_file", "search_indexed_chunks"]
217
- and isinstance(tool_result, dict)
218
- and "debug_info" in tool_result
219
- and self.debug
220
- ):
221
- debug_info = tool_result.get("debug_info")
222
- print("[DEBUG] RAG Query Debug Info:")
223
- print(f" - Search keys: {debug_info.get('search_keys', [])}")
224
- print(
225
- f" - Total chunks found: {debug_info.get('total_chunks_before_dedup', 0)}"
226
- )
227
- print(
228
- f" - After deduplication: {debug_info.get('total_chunks_after_dedup', 0)}"
229
- )
230
- print(
231
- f" - Final chunks returned: {debug_info.get('final_chunks_returned', 0)}"
232
- )
233
-
234
- def _get_system_prompt(self) -> str:
235
- """Generate the system prompt for the Chat Agent."""
236
- # Get list of indexed documents
237
- indexed_docs_section = ""
238
- if hasattr(self, "rag") and self.rag and self.rag.indexed_files:
239
- doc_names = []
240
- for file_path in self.rag.indexed_files:
241
- doc_names.append(Path(file_path).name)
242
-
243
- indexed_docs_section = f"""
244
- **CURRENTLY INDEXED DOCUMENTS:**
245
- You have {len(doc_names)} document(s) already indexed and ready to search:
246
- {chr(10).join(f'- {name}' for name in sorted(doc_names))}
247
-
248
- When the user asks a question about content, you can DIRECTLY search these documents using query_documents or query_specific_file.
249
- You do NOT need to check what's indexed first - this list is always up-to-date.
250
- """
251
- else:
252
- indexed_docs_section = """
253
- **CURRENTLY INDEXED DOCUMENTS:**
254
- No documents are currently indexed.
255
-
256
- **IMPORTANT: When no documents are indexed, act as a normal conversational AI assistant.**
257
- - Answer general questions using your knowledge
258
- - Have natural conversations with the user
259
- - Do NOT try to search for documents unless the user explicitly asks to index/search files
260
- - Do NOT use query_documents or query_specific_file when no documents are indexed
261
- - Only use RAG tools when the user explicitly asks to index documents or search their files
262
- """
263
-
264
- # Build the prompt with indexed documents section
265
- # NOTE: Base agent now provides JSON format rules, so we only add ChatAgent-specific guidance
266
- base_prompt = """You are a helpful AI assistant with document search and RAG capabilities.
267
- """
268
-
269
- # Add indexed documents section
270
- prompt = (
271
- base_prompt
272
- + indexed_docs_section
273
- + """
274
- **WHEN TO USE TOOLS VS DIRECT ANSWERS:**
275
-
276
- Use Format 1 (answer) for:
277
- - Greetings: {"answer": "Hello! How can I help?"}
278
- - Thanks: {"answer": "You're welcome!"}
279
- - **General knowledge questions**: {"answer": "Kalin is a name of Slavic origin meaning..."}
280
- - **Conversation and chat**: {"answer": "That's interesting! Tell me more about..."}
281
- - Out-of-scope: {"answer": "I don't have weather data..."}
282
- - **FINAL ANSWERS after retrieving data**: {"answer": "According to the document, the vision is..."}
283
-
284
- **IMPORTANT: If no documents are indexed, answer ALL questions using general knowledge!**
285
-
286
- Use Format 2 (tool) ONLY when:
287
- - User explicitly asks to search/index files OR documents are already indexed
288
- - "what files are indexed?" {"tool": "list_indexed_documents", "tool_args": {}}
289
- - "search for X" → {"tool": "query_documents", "tool_args": {"query": "X"}}
290
- - "what does doc say?" {"tool": "query_specific_file", "tool_args": {...}}
291
- - "find the oil and gas manual" → {"tool": "search_file", "tool_args": {"file_pattern": "oil and gas manual"}}
292
- - "index my data folder" → {"tool": "search_directory", "tool_args": {"directory_name": "data"}}
293
- - "index files in /path/to/dir" → {"tool": "index_directory", "tool_args": {"directory_path": "/path/to/dir"}}
294
-
295
- **CRITICAL: NEVER make up or guess user data. Always use tools.**
296
-
297
- **SMART DISCOVERY WORKFLOW:**
298
-
299
- When user asks a domain-specific question (e.g., "what is the vision of the oil & gas regulator?"):
300
- 1. Check if relevant documents are indexed
301
- 2. If NO relevant documents found:
302
- a. Extract key terms from question (e.g., "oil", "gas", "regulator")
303
- b. Search for files using search_file with those terms
304
- c. If files found, index them automatically
305
- d. Provide status update: "Found and indexed X file(s)"
306
- e. Then query to answer the question
307
- 3. If documents already indexed, query directly
308
-
309
- Example Smart Discovery:
310
- User: "what is the vision of the oil & gas regulator?"
311
- You: {"tool": "list_indexed_documents", "tool_args": {}}
312
- Result: {"documents": [], "count": 0}
313
- You: {"tool": "search_file", "tool_args": {"file_pattern": "oil gas"}}
314
- Result: {"files": ["/docs/Oil-Gas-Manual.pdf"], "count": 1}
315
- You: {"tool": "index_document", "tool_args": {"file_path": "/docs/Oil-Gas-Manual.pdf"}}
316
- Result: {"status": "success", "chunks": 150}
317
- You: {"thought": "Document indexed, now searching for vision", "tool": "query_specific_file", "tool_args": {"file_path": "/docs/Oil-Gas-Manual.pdf", "query": "vision of the oil gas regulator"}}
318
- Result: {"chunks": ["The vision is to be recognized..."], "scores": [0.92]}
319
- You: {"answer": "According to the Oil & Gas Manual, the vision is to be recognized..."}
320
-
321
- **CONTEXT INFERENCE RULE:**
322
-
323
- When user asks a question without specifying which document:
324
- 1. Check the "CURRENTLY INDEXED DOCUMENTS" section above - you already know what's indexed!
325
- 2. If EXACTLY 1 document indexed **IMMEDIATELY search it**: {"tool": "query_documents", "tool_args": {"query": "..."}}
326
- 3. If 0 documents Use Smart Discovery workflow to find and index relevant files
327
- 4. If multiple documents Search all with query_documents OR ask which specific one: {"answer": "Which document? You have: [list]"}
328
-
329
- **AVAILABLE TOOLS:**
330
- The complete list of available tools with their descriptions is provided below in the AVAILABLE TOOLS section.
331
- Tools are grouped by category: RAG tools, File System tools, Shell tools, etc.
332
-
333
- **FILE SEARCH AND AUTO-INDEX WORKFLOW:**
334
- When user asks "find the X manual" or "find X document on my drive":
335
- 1. Use search_file (automatically searches all drives intelligently):
336
- - Phase 1: Searches common locations (Documents, Downloads, Desktop) - FAST
337
- - Phase 2: If not found, deep search entire drive(s) - THOROUGH
338
- - Filters by document file types (.pdf, .docx, .txt, etc.)
339
- 2. Handle results:
340
- - **If 1 file found**: Automatically index it
341
- - **If multiple files found**: Display numbered list, ask user to select
342
- - **If none found**: Inform user
343
- 3. After indexing, confirm and let user know they can ask questions
344
-
345
- **IMPORTANT: Always show tool results with display_message!**
346
- Tools like search_file return a 'display_message' field - ALWAYS show this to the user:
347
-
348
- Example:
349
- Tool result: {"display_message": " Found 2 file(s) in current directory (gaia)", "file_list": [...]}
350
- You must say: {"answer": " Found 2 file(s) in current directory (gaia):\n1. Oil-Gas-Manual.pdf\n..."}
351
-
352
- NOTE: Progress indicators (spinners) are shown automatically by the tool while searching.
353
- You don't need to say "searching..." - the tool displays it live!
354
-
355
- Example (Single file):
356
- User: "Can you find the oil and gas manual on my drive?"
357
- You: {"tool": "search_file", "tool_args": {"file_pattern": "oil gas"}}
358
- Result: {"files": [...], "count": 1, "display_message": "🔍 Found 1 matching file(s)", "file_list": [{"number": 1, "name": "Oil-Gas-Manual.pdf", "directory": "C:/Users/user/Documents"}]}
359
- You: {"answer": "🔍 Searching for 'oil gas'... Found 1 file:\n• Oil-Gas-Manual.pdf (Documents folder)\n\nIndexing now..."}
360
- You: {"tool": "index_document", "tool_args": {"file_path": "C:/Users/user/Documents/Oil-Gas-Manual.pdf"}}
361
- You: {"answer": "✓ Indexed Oil-Gas-Manual.pdf (150 chunks). You can now ask me questions about it!"}
362
-
363
- Example (Multiple files):
364
- User: "Find the manual on my drive"
365
- You: {"answer": "🔍 Searching your drive for 'manual'..."}
366
- You: {"tool": "search_file", "tool_args": {"file_pattern": "manual"}}
367
- Result: {"count": 3, "file_list": [{"number": 1, "name": "Oil-Gas-Manual.pdf", "directory": "C:/Docs"}, {"number": 2, "name": "Safety-Manual.pdf", "directory": "C:/Downloads"}]}
368
- You: {"answer": "Found 3 matching files:\n\n1. Oil-Gas-Manual.pdf (C:/Docs/)\n2. Safety-Manual.pdf (C:/Downloads/)\n3. Training-Manual.pdf (C:/Work/)\n\nWhich one would you like me to index? (enter the number)"}
369
- User: "1"
370
- You: {"tool": "index_document", "tool_args": {"file_path": "C:/Docs/Oil-Gas-Manual.pdf"}}
371
- You: {"answer": "✓ Indexed Oil-Gas-Manual.pdf. You can now ask questions about it!"}
372
-
373
- **DIRECTORY INDEXING WORKFLOW:**
374
- When user asks to "index my data folder" or similar:
375
- 1. Use search_directory to find matching directories
376
- 2. Show user the matches and ask which one (if multiple)
377
- 3. Use index_directory on the chosen path
378
- 4. Report indexing results"""
379
- )
380
-
381
- return prompt
382
-
383
- def _create_console(self):
384
- """Create console for chat agent."""
385
- from gaia.agents.base.console import SilentConsole
386
-
387
- if self.silent_mode:
388
- # For chat agent, we ALWAYS want to show the final answer
389
- # Even in silent mode, the user needs to see the response
390
- return SilentConsole(silence_final_answer=False)
391
- return AgentConsole()
392
-
393
- def _generate_search_keys(self, query: str) -> List[str]:
394
- """
395
- Generate search keys from query for better retrieval.
396
- Extracts keywords and reformulates query for improved matching.
397
-
398
- Args:
399
- query: User query
400
-
401
- Returns:
402
- List of search keys/queries
403
- """
404
- keys = [query] # Always include original query
405
-
406
- # Extract potential keywords (simple approach)
407
- # Remove common words and extract meaningful terms
408
- stop_words = {
409
- "what",
410
- "how",
411
- "when",
412
- "where",
413
- "who",
414
- "why",
415
- "is",
416
- "are",
417
- "was",
418
- "were",
419
- "the",
420
- "a",
421
- "an",
422
- "and",
423
- "or",
424
- "but",
425
- "in",
426
- "on",
427
- "at",
428
- "to",
429
- "for",
430
- "of",
431
- "with",
432
- "by",
433
- "from",
434
- "about",
435
- "can",
436
- "could",
437
- "would",
438
- "should",
439
- "do",
440
- "does",
441
- "did",
442
- "tell",
443
- "me",
444
- "you",
445
- }
446
-
447
- words = query.lower().split()
448
- keywords = [
449
- w.strip("?,.:;!")
450
- for w in words
451
- if w.lower() not in stop_words and len(w) > 2
452
- ]
453
-
454
- # Add keyword-based query (only if different from original)
455
- if keywords:
456
- keyword_query = " ".join(keywords)
457
- if keyword_query != query: # Avoid duplicates
458
- keys.append(keyword_query)
459
-
460
- # Add question reformulations for common patterns
461
- if query.lower().startswith("what is"):
462
- topic = query[8:].strip("?").strip()
463
- keys.append(f"{topic} definition")
464
- keys.append(f"{topic} explanation")
465
- elif query.lower().startswith("how to"):
466
- topic = query[7:].strip("?").strip()
467
- keys.append(f"{topic} steps")
468
- keys.append(f"{topic} guide")
469
-
470
- logger.debug(f"Generated search keys: {keys}")
471
- return keys
472
-
473
- def _is_path_allowed(self, path: str) -> bool:
474
- """
475
- Check if a path is within allowed directories.
476
- Uses real path resolution to prevent TOCTOU attacks.
477
-
478
- Args:
479
- path: Path to validate
480
-
481
- Returns:
482
- True if path is allowed, False otherwise
483
- """
484
- try:
485
- # Resolve path using os.path.realpath to follow symlinks
486
- # This prevents TOCTOU attacks by resolving at check time
487
- real_path = Path(os.path.realpath(path)).resolve()
488
-
489
- # Check if real path is within any allowed directory
490
- for allowed_path in self.allowed_paths:
491
- try:
492
- # is_relative_to requires Python 3.9+, use alternative for compatibility
493
- real_path.relative_to(allowed_path)
494
- return True
495
- except ValueError:
496
- continue
497
-
498
- return False
499
- except Exception as e:
500
- logger.error(f"Error validating path {path}: {e}")
501
- return False
502
-
503
- def _validate_and_open_file(self, file_path: str, mode: str = "r"):
504
- """
505
- Safely open a file with path validation using O_NOFOLLOW to prevent TOCTOU attacks.
506
-
507
- This method prevents Time-of-Check-Time-of-Use vulnerabilities by:
508
- 1. Using O_NOFOLLOW flag to reject symlinks
509
- 2. Opening file with low-level os.open() before validation
510
- 3. Validating the opened file descriptor, not the path
511
-
512
- Args:
513
- file_path: Path to the file
514
- mode: File open mode ('r', 'w', 'rb', 'wb', etc.)
515
-
516
- Returns:
517
- File handle if successful
518
-
519
- Raises:
520
- PermissionError: If path is not allowed or is a symlink
521
- IOError: If file cannot be opened
522
- """
523
- import stat
524
-
525
- try:
526
- # Determine open flags based on mode
527
- if "r" in mode and "+" not in mode:
528
- flags = os.O_RDONLY
529
- elif "w" in mode:
530
- flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
531
- elif "a" in mode:
532
- flags = os.O_WRONLY | os.O_CREAT | os.O_APPEND
533
- elif "+" in mode:
534
- flags = os.O_RDWR
535
- else:
536
- flags = os.O_RDONLY
537
-
538
- # CRITICAL: Add O_NOFOLLOW to reject symlinks
539
- # This prevents TOCTOU attacks where symlinks are swapped
540
- if hasattr(os, "O_NOFOLLOW"):
541
- flags |= os.O_NOFOLLOW
542
-
543
- # Open the file at low level (doesn't follow symlinks with O_NOFOLLOW)
544
- try:
545
- fd = os.open(file_path, flags)
546
- except OSError as e:
547
- if e.errno == 40: # ELOOP - too many symbolic links
548
- raise PermissionError(f"Symlinks not allowed: {file_path}")
549
- raise IOError(f"Cannot open file {file_path}: {e}")
550
-
551
- # Get the real path of the opened file descriptor
552
- # On Linux, we can use /proc/self/fd/
553
- # On other systems, use fstat
554
- try:
555
- file_stat = os.fstat(fd)
556
-
557
- # Verify it's a regular file, not a directory or special file
558
- if not stat.S_ISREG(file_stat.st_mode):
559
- os.close(fd)
560
- raise PermissionError(f"Not a regular file: {file_path}")
561
-
562
- # Get the real path (Linux-specific, but works on most Unix)
563
- if os.path.exists(f"/proc/self/fd/{fd}"):
564
- real_path = Path(os.readlink(f"/proc/self/fd/{fd}")).resolve()
565
- else:
566
- # Fallback for non-Linux systems
567
- real_path = Path(file_path).resolve()
568
-
569
- # Validate the real path is within allowed directories
570
- path_allowed = False
571
- for allowed_path in self.allowed_paths:
572
- try:
573
- real_path.relative_to(allowed_path)
574
- path_allowed = True
575
- break
576
- except ValueError:
577
- continue
578
-
579
- if not path_allowed:
580
- os.close(fd)
581
- raise PermissionError(
582
- f"Access denied to path: {real_path}\n"
583
- f"Requested: {file_path}\n"
584
- f"Resolved to path outside allowed directories"
585
- )
586
-
587
- # Convert file descriptor to Python file object
588
- if "b" in mode:
589
- return os.fdopen(fd, mode)
590
- else:
591
- return os.fdopen(fd, mode, encoding="utf-8")
592
-
593
- except Exception:
594
- os.close(fd)
595
- raise
596
-
597
- except PermissionError:
598
- raise
599
- except Exception as e:
600
- raise IOError(f"Failed to securely open file {file_path}: {e}")
601
-
602
- def _auto_save_session(self) -> None:
603
- """Auto-save current session (called after important operations)."""
604
- try:
605
- if self.current_session:
606
- self.save_current_session()
607
- if self.debug:
608
- logger.debug(
609
- f"Auto-saved session: {self.current_session.session_id}"
610
- )
611
- except Exception as e:
612
- logger.warning(f"Auto-save failed: {e}")
613
-
614
- def _register_tools(self) -> None:
615
- """Register chat agent tools from mixins."""
616
- # Register tools from mixins
617
- self.register_rag_tools()
618
- self.register_file_tools()
619
- self.register_shell_tools()
620
- self.register_file_search_tools() # Shared file search tools
621
-
622
- # NOTE: The actual tool definitions are in the mixin classes:
623
- # - RAGToolsMixin (rag_tools.py): RAG and document indexing tools
624
- # - FileToolsMixin (file_tools.py): Directory monitoring
625
- # - ShellToolsMixin (shell_tools.py): Shell command execution
626
- # - FileSearchToolsMixin (shared): File and directory search across drives
627
-
628
- def _index_documents(self, documents: List[str]) -> None:
629
- """Index initial documents."""
630
- for doc in documents:
631
- try:
632
- if os.path.exists(doc):
633
- logger.info(f"Indexing document: {doc}")
634
- result = self.rag.index_document(doc)
635
-
636
- if result.get("success"):
637
- self.indexed_files.add(doc)
638
- logger.info(
639
- f"Successfully indexed: {doc} ({result.get('num_chunks', 0)} chunks)"
640
- )
641
- else:
642
- error = result.get("error", "Unknown error")
643
- logger.error(f"Failed to index {doc}: {error}")
644
- else:
645
- logger.warning(f"Document not found: {doc}")
646
- except Exception as e:
647
- logger.error(f"Failed to index {doc}: {e}")
648
-
649
- # Update system prompt after indexing to include the new documents
650
- self._update_system_prompt()
651
-
652
- def _update_system_prompt(self) -> None:
653
- """Update the system prompt with current indexed documents."""
654
- # Regenerate the system prompt with updated document list
655
- self.system_prompt = self._get_system_prompt()
656
-
657
- # Add the tools description using the parent class method
658
- tools_description = self._format_tools_for_prompt()
659
- self.system_prompt += f"\n\n==== AVAILABLE TOOLS ====\n{tools_description}\n\n"
660
-
661
- if self.rag:
662
- logger.debug(
663
- f"Updated system prompt with {len(self.rag.indexed_files)} indexed documents"
664
- )
665
-
666
- def _start_watching(self) -> None:
667
- """Start watching directories for changes."""
668
- for directory in self.watch_directories:
669
- self._watch_directory(directory)
670
-
671
- def _watch_directory(self, directory: str) -> None:
672
- """Watch a directory for file changes."""
673
- if not check_watchdog_available():
674
- error_msg = (
675
- "\n❌ Error: Missing required package 'watchdog'\n\n"
676
- "File watching requires the watchdog package.\n"
677
- "Please install the required dependencies:\n"
678
- ' uv pip install -e ".[dev]"\n\n'
679
- "Or install watchdog directly:\n"
680
- ' uv pip install "watchdog>=2.1.0"\n'
681
- )
682
- logger.error(error_msg)
683
- raise ImportError(error_msg)
684
-
685
- try:
686
- # Use generic FileChangeHandler with callbacks
687
- event_handler = FileChangeHandler(
688
- on_created=self.reindex_file,
689
- on_modified=self.reindex_file,
690
- on_deleted=self._handle_file_deletion,
691
- on_moved=self._handle_file_move,
692
- )
693
- observer = Observer()
694
- observer.schedule(event_handler, directory, recursive=True)
695
- observer.start()
696
- self.observers.append(observer)
697
- logger.info(f"Started watching: {directory}")
698
- except Exception as e:
699
- logger.error(f"Failed to watch {directory}: {e}")
700
-
701
- def _handle_file_deletion(self, file_path: str) -> None:
702
- """Handle file deletion by removing it from the index."""
703
- if not self.rag:
704
- return
705
-
706
- try:
707
- file_abs_path = str(Path(file_path).absolute())
708
- if file_abs_path in self.indexed_files:
709
- logger.info(f"File deleted, removing from index: {file_path}")
710
- if self.rag.remove_document(file_abs_path):
711
- self.indexed_files.discard(file_abs_path)
712
- logger.info(
713
- f"Successfully removed deleted file from index: {file_path}"
714
- )
715
- else:
716
- logger.warning(
717
- f"Failed to remove deleted file from index: {file_path}"
718
- )
719
- except Exception as e:
720
- logger.error(f"Error handling file deletion {file_path}: {e}")
721
-
722
- def _handle_file_move(self, src_path: str, dest_path: str) -> None:
723
- """Handle file move by removing old path and indexing new path."""
724
- self._handle_file_deletion(src_path)
725
- self.reindex_file(dest_path)
726
-
727
- def reindex_file(self, file_path: str) -> None:
728
- """Reindex a file that was modified or created."""
729
- if not self.rag:
730
- logger.warning(
731
- f"Cannot reindex {file_path}: RAG dependencies not installed"
732
- )
733
- return
734
-
735
- try:
736
- logger.info(f"Reindexing: {file_path}")
737
- # Use the new reindex_document method which removes old chunks first
738
- result = self.rag.reindex_document(file_path)
739
- if result.get("success"):
740
- self.indexed_files.add(file_path)
741
- logger.info(f"Successfully reindexed {file_path}")
742
- else:
743
- error = result.get("error", "Unknown error")
744
- logger.error(f"Failed to reindex {file_path}: {error}")
745
- except Exception as e:
746
- logger.error(f"Failed to reindex {file_path}: {e}")
747
-
748
- def stop_watching(self) -> None:
749
- """Stop all file system observers."""
750
- for observer in self.observers:
751
- observer.stop()
752
- observer.join()
753
- self.observers.clear()
754
-
755
- def load_session(self, session_id: str) -> bool:
756
- """
757
- Load a saved session.
758
-
759
- Args:
760
- session_id: Session ID to load
761
-
762
- Returns:
763
- True if successful
764
- """
765
- try:
766
- session = self.session_manager.load_session(session_id)
767
- if not session:
768
- logger.error(f"Session not found: {session_id}")
769
- return False
770
-
771
- self.current_session = session
772
-
773
- # Restore indexed documents (only if RAG is available)
774
- if self.rag:
775
- for doc_path in session.indexed_documents:
776
- if os.path.exists(doc_path):
777
- try:
778
- self.rag.index_document(doc_path)
779
- self.indexed_files.add(doc_path)
780
- except Exception as e:
781
- logger.warning(f"Failed to reindex {doc_path}: {e}")
782
- elif session.indexed_documents:
783
- logger.warning(
784
- f"Cannot restore {len(session.indexed_documents)} indexed documents: "
785
- "RAG dependencies not installed"
786
- )
787
-
788
- # Restore watched directories
789
- for dir_path in session.watched_directories:
790
- if os.path.exists(dir_path) and dir_path not in self.watch_directories:
791
- self.watch_directories.append(dir_path)
792
- self._watch_directory(dir_path)
793
-
794
- # Restore conversation history
795
- self.conversation_history = list(session.chat_history)
796
-
797
- logger.info(
798
- f"Loaded session {session_id}: {len(session.indexed_documents)} docs, {len(session.chat_history)} messages"
799
- )
800
- return True
801
-
802
- except Exception as e:
803
- logger.error(f"Error loading session: {e}")
804
- return False
805
-
806
- def save_current_session(self) -> bool:
807
- """
808
- Save the current session.
809
-
810
- Returns:
811
- True if successful
812
- """
813
- try:
814
- if not self.current_session:
815
- # Create new session
816
- self.current_session = self.session_manager.create_session()
817
-
818
- # Update session data
819
- self.current_session.indexed_documents = list(self.indexed_files)
820
- self.current_session.watched_directories = list(self.watch_directories)
821
- self.current_session.chat_history = list(self.conversation_history)
822
-
823
- # Save
824
- return self.session_manager.save_session(self.current_session)
825
-
826
- except Exception as e:
827
- logger.error(f"Error saving session: {e}")
828
- return False
829
-
830
- def __del__(self):
831
- """Cleanup when agent is destroyed."""
832
- try:
833
- self.stop_watching()
834
- except Exception as e:
835
- logger.error(f"Error stopping file watchers during cleanup: {e}")
1
+ # Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
2
+ # SPDX-License-Identifier: MIT
3
+ """
4
+ Chat Agent - Interactive chat with RAG and file search capabilities.
5
+ """
6
+
7
+ import os
8
+ from dataclasses import dataclass, field
9
+ from pathlib import Path
10
+ from typing import Any, Dict, List, Optional
11
+
12
+ try:
13
+ from watchdog.observers import Observer
14
+ except ImportError:
15
+ Observer = None
16
+
17
+ from gaia.agents.base.agent import Agent
18
+ from gaia.agents.base.console import AgentConsole
19
+ from gaia.agents.chat.session import SessionManager
20
+ from gaia.agents.chat.tools import FileToolsMixin, RAGToolsMixin, ShellToolsMixin
21
+ from gaia.agents.tools import FileSearchToolsMixin # Shared file search tools
22
+ from gaia.logger import get_logger
23
+ from gaia.rag.sdk import RAGSDK, RAGConfig
24
+ from gaia.security import PathValidator
25
+ from gaia.utils.file_watcher import FileChangeHandler, check_watchdog_available
26
+
27
+ logger = get_logger(__name__)
28
+
29
+
30
+ @dataclass
31
+ class ChatAgentConfig:
32
+ """Configuration for ChatAgent."""
33
+
34
+ # LLM settings
35
+ use_claude: bool = False
36
+ use_chatgpt: bool = False
37
+ claude_model: str = "claude-sonnet-4-20250514"
38
+ base_url: str = "http://localhost:8000/api/v1"
39
+ model_id: Optional[str] = None # None = use default Qwen3-Coder-30B
40
+
41
+ # Execution settings
42
+ max_steps: int = 10
43
+ streaming: bool = False # Use --streaming to enable
44
+
45
+ # Debug/output settings
46
+ debug: bool = False
47
+ debug_prompts: bool = False # Backward compatibility
48
+ show_prompts: bool = False
49
+ show_stats: bool = False
50
+ silent_mode: bool = False
51
+ output_dir: Optional[str] = None
52
+
53
+ # RAG settings
54
+ rag_documents: List[str] = field(default_factory=list)
55
+ watch_directories: List[str] = field(default_factory=list)
56
+ chunk_size: int = 500
57
+ chunk_overlap: int = 100
58
+ max_chunks: int = 5
59
+ use_llm_chunking: bool = False # Use fast heuristic-based chunking by default
60
+
61
+ # Security
62
+ allowed_paths: Optional[List[str]] = None
63
+
64
+
65
+ class ChatAgent(
66
+ Agent, RAGToolsMixin, FileToolsMixin, ShellToolsMixin, FileSearchToolsMixin
67
+ ):
68
+ """
69
+ Chat Agent with RAG, file operations, and shell command capabilities.
70
+
71
+ This agent provides:
72
+ - Document Q&A using RAG
73
+ - File search and operations
74
+ - Shell command execution
75
+ - Auto-indexing when files change
76
+ - Interactive chat interface
77
+ - Session persistence with auto-save
78
+ - MCP server integration
79
+ """
80
+
81
+ def __init__(self, config: Optional[ChatAgentConfig] = None):
82
+ """
83
+ Initialize Chat Agent.
84
+
85
+ Args:
86
+ config: ChatAgentConfig object with all settings. If None, uses defaults.
87
+ """
88
+ # Use provided config or create default
89
+ if config is None:
90
+ config = ChatAgentConfig()
91
+
92
+ # Initialize path validator
93
+ self.path_validator = PathValidator(config.allowed_paths)
94
+
95
+ # Store config for access in other methods
96
+ self.config = config
97
+
98
+ # Now use config for all initialization
99
+ # Store RAG configuration from config
100
+ self.rag_documents = config.rag_documents
101
+ self.watch_directories = config.watch_directories
102
+ self.chunk_size = config.chunk_size
103
+ self.max_chunks = config.max_chunks
104
+
105
+ # Security: Configure allowed paths for file operations
106
+ # If None, allow current directory and subdirectories
107
+ if config.allowed_paths is None:
108
+ self.allowed_paths = [Path.cwd()]
109
+ else:
110
+ self.allowed_paths = [Path(p).resolve() for p in config.allowed_paths]
111
+
112
+ # Use Qwen3-Coder-30B by default for better JSON parsing (same as Jira agent)
113
+ effective_model_id = config.model_id or "Qwen3-Coder-30B-A3B-Instruct-GGUF"
114
+
115
+ # Debug logging for model selection
116
+ logger.debug(
117
+ f"Model selection: model_id={repr(config.model_id)}, effective={effective_model_id}"
118
+ )
119
+
120
+ # Store model for display
121
+ self.model_display_name = effective_model_id
122
+
123
+ # Store max_chunks for adaptive retrieval
124
+ self.base_max_chunks = config.max_chunks
125
+
126
+ # Initialize RAG SDK (optional - will be None if dependencies not installed)
127
+ try:
128
+ rag_config = RAGConfig(
129
+ model=effective_model_id,
130
+ chunk_size=config.chunk_size,
131
+ chunk_overlap=config.chunk_overlap, # Configurable overlap for context preservation
132
+ max_chunks=config.max_chunks,
133
+ show_stats=config.show_stats,
134
+ use_local_llm=not (config.use_claude or config.use_chatgpt),
135
+ use_llm_chunking=config.use_llm_chunking, # Enable semantic chunking
136
+ base_url=config.base_url, # Pass base_url to RAG for VLM client
137
+ allowed_paths=config.allowed_paths, # Pass allowed paths to RAG SDK
138
+ )
139
+ self.rag = RAGSDK(rag_config)
140
+ except ImportError as e:
141
+ # RAG dependencies not installed - this is fine, RAG features will be disabled
142
+ logger.debug(f"RAG dependencies not available: {e}")
143
+ self.rag = None
144
+
145
+ # File system monitoring
146
+ self.observers = []
147
+ self.file_handlers = [] # Track FileChangeHandler instances for telemetry
148
+ self.indexed_files = set()
149
+
150
+ # Session management
151
+ self.session_manager = SessionManager()
152
+ self.current_session = None
153
+ self.conversation_history: List[Dict[str, str]] = (
154
+ []
155
+ ) # Track conversation for persistence
156
+
157
+ # Call parent constructor
158
+ super().__init__(
159
+ use_claude=config.use_claude,
160
+ use_chatgpt=config.use_chatgpt,
161
+ claude_model=config.claude_model,
162
+ base_url=config.base_url,
163
+ model_id=effective_model_id, # Pass the effective model to parent
164
+ max_steps=config.max_steps,
165
+ debug_prompts=config.debug_prompts,
166
+ show_prompts=config.show_prompts,
167
+ output_dir=config.output_dir,
168
+ streaming=config.streaming,
169
+ show_stats=config.show_stats,
170
+ silent_mode=config.silent_mode,
171
+ debug=config.debug,
172
+ )
173
+
174
+ # Index initial documents (only if RAG is available)
175
+ if self.rag_documents and self.rag:
176
+ self._index_documents(self.rag_documents)
177
+ elif self.rag_documents and not self.rag:
178
+ logger.warning(
179
+ "RAG dependencies not installed. Cannot index documents. "
180
+ 'Install with: uv pip install -e ".[rag]"'
181
+ )
182
+
183
+ # Start watching directories
184
+ if self.watch_directories:
185
+ self._start_watching()
186
+
187
+ def _post_process_tool_result(
188
+ self, tool_name: str, _tool_args: Dict[str, Any], tool_result: Dict[str, Any]
189
+ ) -> None:
190
+ """
191
+ Post-process tool results for Chat Agent.
192
+
193
+ Handles RAG-specific debug information display.
194
+
195
+ Args:
196
+ tool_name: Name of the tool that was executed
197
+ _tool_args: Arguments that were passed to the tool (unused)
198
+ tool_result: Result returned by the tool
199
+ """
200
+ # Handle RAG query debug information
201
+ if (
202
+ tool_name
203
+ in ["query_documents", "query_specific_file", "search_indexed_chunks"]
204
+ and isinstance(tool_result, dict)
205
+ and "debug_info" in tool_result
206
+ and self.debug
207
+ ):
208
+ debug_info = tool_result.get("debug_info")
209
+ print("[DEBUG] RAG Query Debug Info:")
210
+ print(f" - Search keys: {debug_info.get('search_keys', [])}")
211
+ print(
212
+ f" - Total chunks found: {debug_info.get('total_chunks_before_dedup', 0)}"
213
+ )
214
+ print(
215
+ f" - After deduplication: {debug_info.get('total_chunks_after_dedup', 0)}"
216
+ )
217
+ print(
218
+ f" - Final chunks returned: {debug_info.get('final_chunks_returned', 0)}"
219
+ )
220
+
221
+ def _get_system_prompt(self) -> str:
222
+ """Generate the system prompt for the Chat Agent."""
223
+ # Get list of indexed documents
224
+ indexed_docs_section = ""
225
+ if hasattr(self, "rag") and self.rag and self.rag.indexed_files:
226
+ doc_names = []
227
+ for file_path in self.rag.indexed_files:
228
+ doc_names.append(Path(file_path).name)
229
+
230
+ indexed_docs_section = f"""
231
+ **CURRENTLY INDEXED DOCUMENTS:**
232
+ You have {len(doc_names)} document(s) already indexed and ready to search:
233
+ {chr(10).join(f'- {name}' for name in sorted(doc_names))}
234
+
235
+ When the user asks a question about content, you can DIRECTLY search these documents using query_documents or query_specific_file.
236
+ You do NOT need to check what's indexed first - this list is always up-to-date.
237
+ """
238
+ else:
239
+ indexed_docs_section = """
240
+ **CURRENTLY INDEXED DOCUMENTS:**
241
+ No documents are currently indexed.
242
+
243
+ **IMPORTANT: When no documents are indexed, act as a normal conversational AI assistant.**
244
+ - Answer general questions using your knowledge
245
+ - Have natural conversations with the user
246
+ - Do NOT try to search for documents unless the user explicitly asks to index/search files
247
+ - Do NOT use query_documents or query_specific_file when no documents are indexed
248
+ - Only use RAG tools when the user explicitly asks to index documents or search their files
249
+ """
250
+
251
+ # Build the prompt with indexed documents section
252
+ # NOTE: Base agent now provides JSON format rules, so we only add ChatAgent-specific guidance
253
+ base_prompt = """You are a helpful AI assistant with document search and RAG capabilities.
254
+ """
255
+
256
+ # Add indexed documents section
257
+ prompt = base_prompt + indexed_docs_section + """
258
+ **WHEN TO USE TOOLS VS DIRECT ANSWERS:**
259
+
260
+ Use Format 1 (answer) for:
261
+ - Greetings: {"answer": "Hello! How can I help?"}
262
+ - Thanks: {"answer": "You're welcome!"}
263
+ - **General knowledge questions**: {"answer": "Kalin is a name of Slavic origin meaning..."}
264
+ - **Conversation and chat**: {"answer": "That's interesting! Tell me more about..."}
265
+ - Out-of-scope: {"answer": "I don't have weather data..."}
266
+ - **FINAL ANSWERS after retrieving data**: {"answer": "According to the document, the vision is..."}
267
+
268
+ **IMPORTANT: If no documents are indexed, answer ALL questions using general knowledge!**
269
+
270
+ Use Format 2 (tool) ONLY when:
271
+ - User explicitly asks to search/index files OR documents are already indexed
272
+ - "what files are indexed?" → {"tool": "list_indexed_documents", "tool_args": {}}
273
+ - "search for X" → {"tool": "query_documents", "tool_args": {"query": "X"}}
274
+ - "what does doc say?" {"tool": "query_specific_file", "tool_args": {...}}
275
+ - "find the oil and gas manual" → {"tool": "search_file", "tool_args": {"file_pattern": "oil and gas manual"}}
276
+ - "index my data folder" → {"tool": "search_directory", "tool_args": {"directory_name": "data"}}
277
+ - "index files in /path/to/dir" → {"tool": "index_directory", "tool_args": {"directory_path": "/path/to/dir"}}
278
+
279
+ **CRITICAL: NEVER make up or guess user data. Always use tools.**
280
+
281
+ **SMART DISCOVERY WORKFLOW:**
282
+
283
+ When user asks a domain-specific question (e.g., "what is the vision of the oil & gas regulator?"):
284
+ 1. Check if relevant documents are indexed
285
+ 2. If NO relevant documents found:
286
+ a. Extract key terms from question (e.g., "oil", "gas", "regulator")
287
+ b. Search for files using search_file with those terms
288
+ c. If files found, index them automatically
289
+ d. Provide status update: "Found and indexed X file(s)"
290
+ e. Then query to answer the question
291
+ 3. If documents already indexed, query directly
292
+
293
+ Example Smart Discovery:
294
+ User: "what is the vision of the oil & gas regulator?"
295
+ You: {"tool": "list_indexed_documents", "tool_args": {}}
296
+ Result: {"documents": [], "count": 0}
297
+ You: {"tool": "search_file", "tool_args": {"file_pattern": "oil gas"}}
298
+ Result: {"files": ["/docs/Oil-Gas-Manual.pdf"], "count": 1}
299
+ You: {"tool": "index_document", "tool_args": {"file_path": "/docs/Oil-Gas-Manual.pdf"}}
300
+ Result: {"status": "success", "chunks": 150}
301
+ You: {"thought": "Document indexed, now searching for vision", "tool": "query_specific_file", "tool_args": {"file_path": "/docs/Oil-Gas-Manual.pdf", "query": "vision of the oil gas regulator"}}
302
+ Result: {"chunks": ["The vision is to be recognized..."], "scores": [0.92]}
303
+ You: {"answer": "According to the Oil & Gas Manual, the vision is to be recognized..."}
304
+
305
+ **CONTEXT INFERENCE RULE:**
306
+
307
+ When user asks a question without specifying which document:
308
+ 1. Check the "CURRENTLY INDEXED DOCUMENTS" section above - you already know what's indexed!
309
+ 2. If EXACTLY 1 document indexed → **IMMEDIATELY search it**: {"tool": "query_documents", "tool_args": {"query": "..."}}
310
+ 3. If 0 documents Use Smart Discovery workflow to find and index relevant files
311
+ 4. If multiple documents → Search all with query_documents OR ask which specific one: {"answer": "Which document? You have: [list]"}
312
+
313
+ **AVAILABLE TOOLS:**
314
+ The complete list of available tools with their descriptions is provided below in the AVAILABLE TOOLS section.
315
+ Tools are grouped by category: RAG tools, File System tools, Shell tools, etc.
316
+
317
+ **FILE SEARCH AND AUTO-INDEX WORKFLOW:**
318
+ When user asks "find the X manual" or "find X document on my drive":
319
+ 1. Use search_file (automatically searches all drives intelligently):
320
+ - Phase 1: Searches common locations (Documents, Downloads, Desktop) - FAST
321
+ - Phase 2: If not found, deep search entire drive(s) - THOROUGH
322
+ - Filters by document file types (.pdf, .docx, .txt, etc.)
323
+ 2. Handle results:
324
+ - **If 1 file found**: Automatically index it
325
+ - **If multiple files found**: Display numbered list, ask user to select
326
+ - **If none found**: Inform user
327
+ 3. After indexing, confirm and let user know they can ask questions
328
+
329
+ **IMPORTANT: Always show tool results with display_message!**
330
+ Tools like search_file return a 'display_message' field - ALWAYS show this to the user:
331
+
332
+ Example:
333
+ Tool result: {"display_message": "✓ Found 2 file(s) in current directory (gaia)", "file_list": [...]}
334
+ You must say: {"answer": "✓ Found 2 file(s) in current directory (gaia):\n1. Oil-Gas-Manual.pdf\n..."}
335
+
336
+ NOTE: Progress indicators (spinners) are shown automatically by the tool while searching.
337
+ You don't need to say "searching..." - the tool displays it live!
338
+
339
+ Example (Single file):
340
+ User: "Can you find the oil and gas manual on my drive?"
341
+ You: {"tool": "search_file", "tool_args": {"file_pattern": "oil gas"}}
342
+ Result: {"files": [...], "count": 1, "display_message": "🔍 Found 1 matching file(s)", "file_list": [{"number": 1, "name": "Oil-Gas-Manual.pdf", "directory": "C:/Users/user/Documents"}]}
343
+ You: {"answer": "🔍 Searching for 'oil gas'... Found 1 file:\n• Oil-Gas-Manual.pdf (Documents folder)\n\nIndexing now..."}
344
+ You: {"tool": "index_document", "tool_args": {"file_path": "C:/Users/user/Documents/Oil-Gas-Manual.pdf"}}
345
+ You: {"answer": "✓ Indexed Oil-Gas-Manual.pdf (150 chunks). You can now ask me questions about it!"}
346
+
347
+ Example (Multiple files):
348
+ User: "Find the manual on my drive"
349
+ You: {"answer": "🔍 Searching your drive for 'manual'..."}
350
+ You: {"tool": "search_file", "tool_args": {"file_pattern": "manual"}}
351
+ Result: {"count": 3, "file_list": [{"number": 1, "name": "Oil-Gas-Manual.pdf", "directory": "C:/Docs"}, {"number": 2, "name": "Safety-Manual.pdf", "directory": "C:/Downloads"}]}
352
+ You: {"answer": "Found 3 matching files:\n\n1. Oil-Gas-Manual.pdf (C:/Docs/)\n2. Safety-Manual.pdf (C:/Downloads/)\n3. Training-Manual.pdf (C:/Work/)\n\nWhich one would you like me to index? (enter the number)"}
353
+ User: "1"
354
+ You: {"tool": "index_document", "tool_args": {"file_path": "C:/Docs/Oil-Gas-Manual.pdf"}}
355
+ You: {"answer": "✓ Indexed Oil-Gas-Manual.pdf. You can now ask questions about it!"}
356
+
357
+ **DIRECTORY INDEXING WORKFLOW:**
358
+ When user asks to "index my data folder" or similar:
359
+ 1. Use search_directory to find matching directories
360
+ 2. Show user the matches and ask which one (if multiple)
361
+ 3. Use index_directory on the chosen path
362
+ 4. Report indexing results"""
363
+
364
+ return prompt
365
+
366
+ def _create_console(self):
367
+ """Create console for chat agent."""
368
+ from gaia.agents.base.console import SilentConsole
369
+
370
+ if self.silent_mode:
371
+ # For chat agent, we ALWAYS want to show the final answer
372
+ # Even in silent mode, the user needs to see the response
373
+ return SilentConsole(silence_final_answer=False)
374
+ return AgentConsole()
375
+
376
+ def _generate_search_keys(self, query: str) -> List[str]:
377
+ """
378
+ Generate search keys from query for better retrieval.
379
+ Extracts keywords and reformulates query for improved matching.
380
+
381
+ Args:
382
+ query: User query
383
+
384
+ Returns:
385
+ List of search keys/queries
386
+ """
387
+ keys = [query] # Always include original query
388
+
389
+ # Extract potential keywords (simple approach)
390
+ # Remove common words and extract meaningful terms
391
+ stop_words = {
392
+ "what",
393
+ "how",
394
+ "when",
395
+ "where",
396
+ "who",
397
+ "why",
398
+ "is",
399
+ "are",
400
+ "was",
401
+ "were",
402
+ "the",
403
+ "a",
404
+ "an",
405
+ "and",
406
+ "or",
407
+ "but",
408
+ "in",
409
+ "on",
410
+ "at",
411
+ "to",
412
+ "for",
413
+ "of",
414
+ "with",
415
+ "by",
416
+ "from",
417
+ "about",
418
+ "can",
419
+ "could",
420
+ "would",
421
+ "should",
422
+ "do",
423
+ "does",
424
+ "did",
425
+ "tell",
426
+ "me",
427
+ "you",
428
+ }
429
+
430
+ words = query.lower().split()
431
+ keywords = [
432
+ w.strip("?,.:;!")
433
+ for w in words
434
+ if w.lower() not in stop_words and len(w) > 2
435
+ ]
436
+
437
+ # Add keyword-based query (only if different from original)
438
+ if keywords:
439
+ keyword_query = " ".join(keywords)
440
+ if keyword_query != query: # Avoid duplicates
441
+ keys.append(keyword_query)
442
+
443
+ # Add question reformulations for common patterns
444
+ if query.lower().startswith("what is"):
445
+ topic = query[8:].strip("?").strip()
446
+ keys.append(f"{topic} definition")
447
+ keys.append(f"{topic} explanation")
448
+ elif query.lower().startswith("how to"):
449
+ topic = query[7:].strip("?").strip()
450
+ keys.append(f"{topic} steps")
451
+ keys.append(f"{topic} guide")
452
+
453
+ logger.debug(f"Generated search keys: {keys}")
454
+ return keys
455
+
456
+ def _is_path_allowed(self, path: str) -> bool:
457
+ """
458
+ Check if a path is within allowed directories.
459
+ Uses PathValidator for the actual check.
460
+
461
+ Args:
462
+ path: Path to validate
463
+
464
+ Returns:
465
+ True if path is allowed, False otherwise
466
+ """
467
+ return self.path_validator.is_path_allowed(path, prompt_user=False)
468
+
469
+ def _validate_and_open_file(self, file_path: str, mode: str = "r"):
470
+ """
471
+ Safely open a file with path validation using O_NOFOLLOW to prevent TOCTOU attacks.
472
+
473
+ This method prevents Time-of-Check-Time-of-Use vulnerabilities by:
474
+ 1. Using O_NOFOLLOW flag to reject symlinks
475
+ 2. Opening file with low-level os.open() before validation
476
+ 3. Validating the opened file descriptor, not the path
477
+
478
+ Args:
479
+ file_path: Path to the file
480
+ mode: File open mode ('r', 'w', 'rb', 'wb', etc.)
481
+
482
+ Returns:
483
+ File handle if successful
484
+
485
+ Raises:
486
+ PermissionError: If path is not allowed or is a symlink
487
+ IOError: If file cannot be opened
488
+ """
489
+ import stat
490
+
491
+ try:
492
+ # Determine open flags based on mode
493
+ if "r" in mode and "+" not in mode:
494
+ flags = os.O_RDONLY
495
+ elif "w" in mode:
496
+ flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
497
+ elif "a" in mode:
498
+ flags = os.O_WRONLY | os.O_CREAT | os.O_APPEND
499
+ elif "+" in mode:
500
+ flags = os.O_RDWR
501
+ else:
502
+ flags = os.O_RDONLY
503
+
504
+ # CRITICAL: Add O_NOFOLLOW to reject symlinks
505
+ # This prevents TOCTOU attacks where symlinks are swapped
506
+ if hasattr(os, "O_NOFOLLOW"):
507
+ flags |= os.O_NOFOLLOW
508
+
509
+ # Open the file at low level (doesn't follow symlinks with O_NOFOLLOW)
510
+ try:
511
+ fd = os.open(file_path, flags)
512
+ except OSError as e:
513
+ if e.errno == 40: # ELOOP - too many symbolic links
514
+ raise PermissionError(f"Symlinks not allowed: {file_path}")
515
+ raise IOError(f"Cannot open file {file_path}: {e}")
516
+
517
+ # Get the real path of the opened file descriptor
518
+ # On Linux, we can use /proc/self/fd/
519
+ # On other systems, use fstat
520
+ try:
521
+ file_stat = os.fstat(fd)
522
+
523
+ # Verify it's a regular file, not a directory or special file
524
+ if not stat.S_ISREG(file_stat.st_mode):
525
+ os.close(fd)
526
+ raise PermissionError(f"Not a regular file: {file_path}")
527
+
528
+ # Get the real path (Linux-specific, but works on most Unix)
529
+ if os.path.exists(f"/proc/self/fd/{fd}"):
530
+ real_path = Path(os.readlink(f"/proc/self/fd/{fd}")).resolve()
531
+ else:
532
+ # Fallback for non-Linux systems
533
+ real_path = Path(file_path).resolve()
534
+
535
+ # Validate the real path is within allowed directories
536
+ path_allowed = False
537
+ for allowed_path in self.allowed_paths:
538
+ try:
539
+ real_path.relative_to(allowed_path)
540
+ path_allowed = True
541
+ break
542
+ except ValueError:
543
+ continue
544
+
545
+ if not path_allowed:
546
+ os.close(fd)
547
+ raise PermissionError(
548
+ f"Access denied to path: {real_path}\n"
549
+ f"Requested: {file_path}\n"
550
+ f"Resolved to path outside allowed directories"
551
+ )
552
+
553
+ # Convert file descriptor to Python file object
554
+ if "b" in mode:
555
+ return os.fdopen(fd, mode)
556
+ else:
557
+ return os.fdopen(fd, mode, encoding="utf-8")
558
+
559
+ except Exception:
560
+ os.close(fd)
561
+ raise
562
+
563
+ except PermissionError:
564
+ raise
565
+ except Exception as e:
566
+ raise IOError(f"Failed to securely open file {file_path}: {e}")
567
+
568
+ def _auto_save_session(self) -> None:
569
+ """Auto-save current session (called after important operations)."""
570
+ try:
571
+ if self.current_session:
572
+ self.save_current_session()
573
+ if self.debug:
574
+ logger.debug(
575
+ f"Auto-saved session: {self.current_session.session_id}"
576
+ )
577
+ except Exception as e:
578
+ logger.warning(f"Auto-save failed: {e}")
579
+
580
+ def _register_tools(self) -> None:
581
+ """Register chat agent tools from mixins."""
582
+ # Register tools from mixins
583
+ self.register_rag_tools()
584
+ self.register_file_tools()
585
+ self.register_shell_tools()
586
+ self.register_file_search_tools() # Shared file search tools
587
+
588
+ # NOTE: The actual tool definitions are in the mixin classes:
589
+ # - RAGToolsMixin (rag_tools.py): RAG and document indexing tools
590
+ # - FileToolsMixin (file_tools.py): Directory monitoring
591
+ # - ShellToolsMixin (shell_tools.py): Shell command execution
592
+ # - FileSearchToolsMixin (shared): File and directory search across drives
593
+
594
+ def _index_documents(self, documents: List[str]) -> None:
595
+ """Index initial documents."""
596
+ for doc in documents:
597
+ try:
598
+ if os.path.exists(doc):
599
+ logger.info(f"Indexing document: {doc}")
600
+ result = self.rag.index_document(doc)
601
+
602
+ if result.get("success"):
603
+ self.indexed_files.add(doc)
604
+ logger.info(
605
+ f"Successfully indexed: {doc} ({result.get('num_chunks', 0)} chunks)"
606
+ )
607
+ else:
608
+ error = result.get("error", "Unknown error")
609
+ logger.error(f"Failed to index {doc}: {error}")
610
+ else:
611
+ logger.warning(f"Document not found: {doc}")
612
+ except Exception as e:
613
+ logger.error(f"Failed to index {doc}: {e}")
614
+
615
+ # Update system prompt after indexing to include the new documents
616
+ self.update_system_prompt()
617
+
618
+ def update_system_prompt(self) -> None:
619
+ """Update the system prompt with current indexed documents."""
620
+ # Regenerate the system prompt with updated document list
621
+ self.system_prompt = self._get_system_prompt()
622
+
623
+ # Add the tools description using the parent class method
624
+ tools_description = self._format_tools_for_prompt()
625
+ self.system_prompt += f"\n\n==== AVAILABLE TOOLS ====\n{tools_description}\n\n"
626
+
627
+ if self.rag:
628
+ logger.debug(
629
+ f"Updated system prompt with {len(self.rag.indexed_files)} indexed documents"
630
+ )
631
+
632
+ def _start_watching(self) -> None:
633
+ """Start watching directories for changes."""
634
+ for directory in self.watch_directories:
635
+ self._watch_directory(directory)
636
+
637
+ def _watch_directory(self, directory: str) -> None:
638
+ """Watch a directory for file changes."""
639
+ if not check_watchdog_available():
640
+ error_msg = (
641
+ "\n❌ Error: Missing required package 'watchdog'\n\n"
642
+ "File watching requires the watchdog package.\n"
643
+ "Please install the required dependencies:\n"
644
+ ' uv pip install -e ".[dev]"\n\n'
645
+ "Or install watchdog directly:\n"
646
+ ' uv pip install "watchdog>=2.1.0"\n'
647
+ )
648
+ logger.error(error_msg)
649
+ raise ImportError(error_msg)
650
+
651
+ try:
652
+ # Use generic FileChangeHandler with callbacks
653
+ event_handler = FileChangeHandler(
654
+ on_created=self.reindex_file,
655
+ on_modified=self.reindex_file,
656
+ on_deleted=self._handle_file_deletion,
657
+ on_moved=self._handle_file_move,
658
+ )
659
+ observer = Observer()
660
+ observer.schedule(event_handler, directory, recursive=True)
661
+ observer.start()
662
+ self.observers.append(observer)
663
+ logger.info(f"Started watching: {directory}")
664
+ except Exception as e:
665
+ logger.error(f"Failed to watch {directory}: {e}")
666
+
667
+ def _handle_file_deletion(self, file_path: str) -> None:
668
+ """Handle file deletion by removing it from the index."""
669
+ if not self.rag:
670
+ return
671
+
672
+ try:
673
+ file_abs_path = str(Path(file_path).absolute())
674
+ if file_abs_path in self.indexed_files:
675
+ logger.info(f"File deleted, removing from index: {file_path}")
676
+ if self.rag.remove_document(file_abs_path):
677
+ self.indexed_files.discard(file_abs_path)
678
+ logger.info(
679
+ f"Successfully removed deleted file from index: {file_path}"
680
+ )
681
+ else:
682
+ logger.warning(
683
+ f"Failed to remove deleted file from index: {file_path}"
684
+ )
685
+ except Exception as e:
686
+ logger.error(f"Error handling file deletion {file_path}: {e}")
687
+
688
+ def _handle_file_move(self, src_path: str, dest_path: str) -> None:
689
+ """Handle file move by removing old path and indexing new path."""
690
+ self._handle_file_deletion(src_path)
691
+ self.reindex_file(dest_path)
692
+
693
+ def reindex_file(self, file_path: str) -> None:
694
+ """Reindex a file that was modified or created."""
695
+ if not self.rag:
696
+ logger.warning(
697
+ f"Cannot reindex {file_path}: RAG dependencies not installed"
698
+ )
699
+ return
700
+
701
+ # Resolve to real path for consistent validation
702
+ real_file_path = os.path.realpath(file_path)
703
+
704
+ # Security check
705
+ if not self._is_path_allowed(real_file_path):
706
+ logger.warning(f"Re-indexing skipped: Path not allowed {real_file_path}")
707
+ return
708
+
709
+ try:
710
+ logger.info(f"Reindexing: {real_file_path}")
711
+ # Use the new reindex_document method which removes old chunks first
712
+ result = self.rag.reindex_document(real_file_path)
713
+ if result.get("success"):
714
+ self.indexed_files.add(file_path)
715
+ logger.info(f"Successfully reindexed {real_file_path}")
716
+ else:
717
+ error = result.get("error", "Unknown error")
718
+ logger.error(f"Failed to reindex {real_file_path}: {error}")
719
+ except Exception as e:
720
+ logger.error(f"Failed to reindex {real_file_path}: {e}")
721
+
722
+ def stop_watching(self) -> None:
723
+ """Stop all file system observers."""
724
+ for observer in self.observers:
725
+ observer.stop()
726
+ observer.join()
727
+ self.observers.clear()
728
+
729
+ def load_session(self, session_id: str) -> bool:
730
+ """
731
+ Load a saved session.
732
+
733
+ Args:
734
+ session_id: Session ID to load
735
+
736
+ Returns:
737
+ True if successful
738
+ """
739
+ try:
740
+ session = self.session_manager.load_session(session_id)
741
+ if not session:
742
+ logger.error(f"Session not found: {session_id}")
743
+ return False
744
+
745
+ self.current_session = session
746
+
747
+ # Restore indexed documents (only if RAG is available)
748
+ if self.rag:
749
+ for doc_path in session.indexed_documents:
750
+ if os.path.exists(doc_path):
751
+ try:
752
+ self.rag.index_document(doc_path)
753
+ self.indexed_files.add(doc_path)
754
+ except Exception as e:
755
+ logger.warning(f"Failed to reindex {doc_path}: {e}")
756
+ elif session.indexed_documents:
757
+ logger.warning(
758
+ f"Cannot restore {len(session.indexed_documents)} indexed documents: "
759
+ "RAG dependencies not installed"
760
+ )
761
+
762
+ # Restore watched directories
763
+ for dir_path in session.watched_directories:
764
+ if os.path.exists(dir_path) and dir_path not in self.watch_directories:
765
+ self.watch_directories.append(dir_path)
766
+ self._watch_directory(dir_path)
767
+
768
+ # Restore conversation history
769
+ self.conversation_history = list(session.chat_history)
770
+
771
+ logger.info(
772
+ f"Loaded session {session_id}: {len(session.indexed_documents)} docs, {len(session.chat_history)} messages"
773
+ )
774
+ return True
775
+
776
+ except Exception as e:
777
+ logger.error(f"Error loading session: {e}")
778
+ return False
779
+
780
+ def save_current_session(self) -> bool:
781
+ """
782
+ Save the current session.
783
+
784
+ Returns:
785
+ True if successful
786
+ """
787
+ try:
788
+ if not self.current_session:
789
+ # Create new session
790
+ self.current_session = self.session_manager.create_session()
791
+
792
+ # Update session data
793
+ self.current_session.indexed_documents = list(self.indexed_files)
794
+ self.current_session.watched_directories = list(self.watch_directories)
795
+ self.current_session.chat_history = list(self.conversation_history)
796
+
797
+ # Save
798
+ return self.session_manager.save_session(self.current_session)
799
+
800
+ except Exception as e:
801
+ logger.error(f"Error saving session: {e}")
802
+ return False
803
+
804
+ def __del__(self):
805
+ """Cleanup when agent is destroyed."""
806
+ try:
807
+ self.stop_watching()
808
+ except Exception as e:
809
+ logger.error(f"Error stopping file watchers during cleanup: {e}")