amd-gaia 0.15.0__py3-none-any.whl → 0.15.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.1.dist-info}/METADATA +223 -223
  2. amd_gaia-0.15.1.dist-info/RECORD +178 -0
  3. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.1.dist-info}/entry_points.txt +1 -0
  4. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.1.dist-info}/licenses/LICENSE.md +20 -20
  5. gaia/__init__.py +29 -29
  6. gaia/agents/__init__.py +19 -19
  7. gaia/agents/base/__init__.py +9 -9
  8. gaia/agents/base/agent.py +2177 -2177
  9. gaia/agents/base/api_agent.py +120 -120
  10. gaia/agents/base/console.py +1841 -1841
  11. gaia/agents/base/errors.py +237 -237
  12. gaia/agents/base/mcp_agent.py +86 -86
  13. gaia/agents/base/tools.py +83 -83
  14. gaia/agents/blender/agent.py +556 -556
  15. gaia/agents/blender/agent_simple.py +133 -135
  16. gaia/agents/blender/app.py +211 -211
  17. gaia/agents/blender/app_simple.py +41 -41
  18. gaia/agents/blender/core/__init__.py +16 -16
  19. gaia/agents/blender/core/materials.py +506 -506
  20. gaia/agents/blender/core/objects.py +316 -316
  21. gaia/agents/blender/core/rendering.py +225 -225
  22. gaia/agents/blender/core/scene.py +220 -220
  23. gaia/agents/blender/core/view.py +146 -146
  24. gaia/agents/chat/__init__.py +9 -9
  25. gaia/agents/chat/agent.py +835 -835
  26. gaia/agents/chat/app.py +1058 -1058
  27. gaia/agents/chat/session.py +508 -508
  28. gaia/agents/chat/tools/__init__.py +15 -15
  29. gaia/agents/chat/tools/file_tools.py +96 -96
  30. gaia/agents/chat/tools/rag_tools.py +1729 -1729
  31. gaia/agents/chat/tools/shell_tools.py +436 -436
  32. gaia/agents/code/__init__.py +7 -7
  33. gaia/agents/code/agent.py +549 -549
  34. gaia/agents/code/cli.py +377 -0
  35. gaia/agents/code/models.py +135 -135
  36. gaia/agents/code/orchestration/__init__.py +24 -24
  37. gaia/agents/code/orchestration/checklist_executor.py +1763 -1763
  38. gaia/agents/code/orchestration/checklist_generator.py +713 -713
  39. gaia/agents/code/orchestration/factories/__init__.py +9 -9
  40. gaia/agents/code/orchestration/factories/base.py +63 -63
  41. gaia/agents/code/orchestration/factories/nextjs_factory.py +118 -118
  42. gaia/agents/code/orchestration/factories/python_factory.py +106 -106
  43. gaia/agents/code/orchestration/orchestrator.py +841 -841
  44. gaia/agents/code/orchestration/project_analyzer.py +391 -391
  45. gaia/agents/code/orchestration/steps/__init__.py +67 -67
  46. gaia/agents/code/orchestration/steps/base.py +188 -188
  47. gaia/agents/code/orchestration/steps/error_handler.py +314 -314
  48. gaia/agents/code/orchestration/steps/nextjs.py +828 -828
  49. gaia/agents/code/orchestration/steps/python.py +307 -307
  50. gaia/agents/code/orchestration/template_catalog.py +469 -469
  51. gaia/agents/code/orchestration/workflows/__init__.py +14 -14
  52. gaia/agents/code/orchestration/workflows/base.py +80 -80
  53. gaia/agents/code/orchestration/workflows/nextjs.py +186 -186
  54. gaia/agents/code/orchestration/workflows/python.py +94 -94
  55. gaia/agents/code/prompts/__init__.py +11 -11
  56. gaia/agents/code/prompts/base_prompt.py +77 -77
  57. gaia/agents/code/prompts/code_patterns.py +2036 -2036
  58. gaia/agents/code/prompts/nextjs_prompt.py +40 -40
  59. gaia/agents/code/prompts/python_prompt.py +109 -109
  60. gaia/agents/code/schema_inference.py +365 -365
  61. gaia/agents/code/system_prompt.py +41 -41
  62. gaia/agents/code/tools/__init__.py +42 -42
  63. gaia/agents/code/tools/cli_tools.py +1138 -1138
  64. gaia/agents/code/tools/code_formatting.py +319 -319
  65. gaia/agents/code/tools/code_tools.py +769 -769
  66. gaia/agents/code/tools/error_fixing.py +1347 -1347
  67. gaia/agents/code/tools/external_tools.py +180 -180
  68. gaia/agents/code/tools/file_io.py +845 -845
  69. gaia/agents/code/tools/prisma_tools.py +190 -190
  70. gaia/agents/code/tools/project_management.py +1016 -1016
  71. gaia/agents/code/tools/testing.py +321 -321
  72. gaia/agents/code/tools/typescript_tools.py +122 -122
  73. gaia/agents/code/tools/validation_parsing.py +461 -461
  74. gaia/agents/code/tools/validation_tools.py +806 -806
  75. gaia/agents/code/tools/web_dev_tools.py +1758 -1758
  76. gaia/agents/code/validators/__init__.py +16 -16
  77. gaia/agents/code/validators/antipattern_checker.py +241 -241
  78. gaia/agents/code/validators/ast_analyzer.py +197 -197
  79. gaia/agents/code/validators/requirements_validator.py +145 -145
  80. gaia/agents/code/validators/syntax_validator.py +171 -171
  81. gaia/agents/docker/__init__.py +7 -7
  82. gaia/agents/docker/agent.py +642 -642
  83. gaia/agents/emr/__init__.py +8 -8
  84. gaia/agents/emr/agent.py +1506 -1506
  85. gaia/agents/emr/cli.py +1322 -1322
  86. gaia/agents/emr/constants.py +475 -475
  87. gaia/agents/emr/dashboard/__init__.py +4 -4
  88. gaia/agents/emr/dashboard/server.py +1974 -1974
  89. gaia/agents/jira/__init__.py +11 -11
  90. gaia/agents/jira/agent.py +894 -894
  91. gaia/agents/jira/jql_templates.py +299 -299
  92. gaia/agents/routing/__init__.py +7 -7
  93. gaia/agents/routing/agent.py +567 -570
  94. gaia/agents/routing/system_prompt.py +75 -75
  95. gaia/agents/summarize/__init__.py +11 -0
  96. gaia/agents/summarize/agent.py +885 -0
  97. gaia/agents/summarize/prompts.py +129 -0
  98. gaia/api/__init__.py +23 -23
  99. gaia/api/agent_registry.py +238 -238
  100. gaia/api/app.py +305 -305
  101. gaia/api/openai_server.py +575 -575
  102. gaia/api/schemas.py +186 -186
  103. gaia/api/sse_handler.py +373 -373
  104. gaia/apps/__init__.py +4 -4
  105. gaia/apps/llm/__init__.py +6 -6
  106. gaia/apps/llm/app.py +173 -169
  107. gaia/apps/summarize/app.py +116 -633
  108. gaia/apps/summarize/html_viewer.py +133 -133
  109. gaia/apps/summarize/pdf_formatter.py +284 -284
  110. gaia/audio/__init__.py +2 -2
  111. gaia/audio/audio_client.py +439 -439
  112. gaia/audio/audio_recorder.py +269 -269
  113. gaia/audio/kokoro_tts.py +599 -599
  114. gaia/audio/whisper_asr.py +432 -432
  115. gaia/chat/__init__.py +16 -16
  116. gaia/chat/app.py +430 -430
  117. gaia/chat/prompts.py +522 -522
  118. gaia/chat/sdk.py +1228 -1225
  119. gaia/cli.py +5481 -5632
  120. gaia/database/__init__.py +10 -10
  121. gaia/database/agent.py +176 -176
  122. gaia/database/mixin.py +290 -290
  123. gaia/database/testing.py +64 -64
  124. gaia/eval/batch_experiment.py +2332 -2332
  125. gaia/eval/claude.py +542 -542
  126. gaia/eval/config.py +37 -37
  127. gaia/eval/email_generator.py +512 -512
  128. gaia/eval/eval.py +3179 -3179
  129. gaia/eval/groundtruth.py +1130 -1130
  130. gaia/eval/transcript_generator.py +582 -582
  131. gaia/eval/webapp/README.md +167 -167
  132. gaia/eval/webapp/package-lock.json +875 -875
  133. gaia/eval/webapp/package.json +20 -20
  134. gaia/eval/webapp/public/app.js +3402 -3402
  135. gaia/eval/webapp/public/index.html +87 -87
  136. gaia/eval/webapp/public/styles.css +3661 -3661
  137. gaia/eval/webapp/server.js +415 -415
  138. gaia/eval/webapp/test-setup.js +72 -72
  139. gaia/llm/__init__.py +9 -2
  140. gaia/llm/base_client.py +60 -0
  141. gaia/llm/exceptions.py +12 -0
  142. gaia/llm/factory.py +70 -0
  143. gaia/llm/lemonade_client.py +3236 -3221
  144. gaia/llm/lemonade_manager.py +294 -294
  145. gaia/llm/providers/__init__.py +9 -0
  146. gaia/llm/providers/claude.py +108 -0
  147. gaia/llm/providers/lemonade.py +120 -0
  148. gaia/llm/providers/openai_provider.py +79 -0
  149. gaia/llm/vlm_client.py +382 -382
  150. gaia/logger.py +189 -189
  151. gaia/mcp/agent_mcp_server.py +245 -245
  152. gaia/mcp/blender_mcp_client.py +138 -138
  153. gaia/mcp/blender_mcp_server.py +648 -648
  154. gaia/mcp/context7_cache.py +332 -332
  155. gaia/mcp/external_services.py +518 -518
  156. gaia/mcp/mcp_bridge.py +811 -550
  157. gaia/mcp/servers/__init__.py +6 -6
  158. gaia/mcp/servers/docker_mcp.py +83 -83
  159. gaia/perf_analysis.py +361 -0
  160. gaia/rag/__init__.py +10 -10
  161. gaia/rag/app.py +293 -293
  162. gaia/rag/demo.py +304 -304
  163. gaia/rag/pdf_utils.py +235 -235
  164. gaia/rag/sdk.py +2194 -2194
  165. gaia/security.py +163 -163
  166. gaia/talk/app.py +289 -289
  167. gaia/talk/sdk.py +538 -538
  168. gaia/testing/__init__.py +87 -87
  169. gaia/testing/assertions.py +330 -330
  170. gaia/testing/fixtures.py +333 -333
  171. gaia/testing/mocks.py +493 -493
  172. gaia/util.py +46 -46
  173. gaia/utils/__init__.py +33 -33
  174. gaia/utils/file_watcher.py +675 -675
  175. gaia/utils/parsing.py +223 -223
  176. gaia/version.py +100 -100
  177. amd_gaia-0.15.0.dist-info/RECORD +0 -168
  178. gaia/agents/code/app.py +0 -266
  179. gaia/llm/llm_client.py +0 -723
  180. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.1.dist-info}/WHEEL +0 -0
  181. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.1.dist-info}/top_level.txt +0 -0
gaia/agents/chat/agent.py CHANGED
@@ -1,835 +1,835 @@
1
- # Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
2
- # SPDX-License-Identifier: MIT
3
- """
4
- Chat Agent - Interactive chat with RAG and file search capabilities.
5
- """
6
-
7
- import os
8
- from dataclasses import dataclass, field
9
- from pathlib import Path
10
- from typing import Any, Dict, List, Optional
11
-
12
- try:
13
- from watchdog.observers import Observer
14
- except ImportError:
15
- Observer = None
16
-
17
- from gaia.agents.base.agent import Agent
18
- from gaia.agents.base.console import AgentConsole
19
- from gaia.agents.chat.session import SessionManager
20
- from gaia.agents.chat.tools import FileToolsMixin, RAGToolsMixin, ShellToolsMixin
21
- from gaia.agents.tools import FileSearchToolsMixin # Shared file search tools
22
- from gaia.logger import get_logger
23
- from gaia.rag.sdk import RAGSDK, RAGConfig
24
- from gaia.security import PathValidator
25
- from gaia.utils.file_watcher import FileChangeHandler, check_watchdog_available
26
-
27
- logger = get_logger(__name__)
28
-
29
-
30
- @dataclass
31
- class ChatAgentConfig:
32
- """Configuration for ChatAgent."""
33
-
34
- # LLM settings
35
- use_claude: bool = False
36
- use_chatgpt: bool = False
37
- claude_model: str = "claude-sonnet-4-20250514"
38
- base_url: str = "http://localhost:8000/api/v1"
39
- model_id: Optional[str] = None # None = use default Qwen3-Coder-30B
40
-
41
- # Execution settings
42
- max_steps: int = 10
43
- streaming: bool = False # Use --streaming to enable
44
-
45
- # Debug/output settings
46
- debug: bool = False
47
- debug_prompts: bool = False # Backward compatibility
48
- show_prompts: bool = False
49
- show_stats: bool = False
50
- silent_mode: bool = False
51
- output_dir: Optional[str] = None
52
-
53
- # RAG settings
54
- rag_documents: List[str] = field(default_factory=list)
55
- watch_directories: List[str] = field(default_factory=list)
56
- chunk_size: int = 500
57
- chunk_overlap: int = 100
58
- max_chunks: int = 5
59
- use_llm_chunking: bool = False # Use fast heuristic-based chunking by default
60
-
61
- # Security
62
- allowed_paths: Optional[List[str]] = None
63
-
64
-
65
- class ChatAgent(
66
- Agent, RAGToolsMixin, FileToolsMixin, ShellToolsMixin, FileSearchToolsMixin
67
- ):
68
- """
69
- Chat Agent with RAG, file operations, and shell command capabilities.
70
-
71
- This agent provides:
72
- - Document Q&A using RAG
73
- - File search and operations
74
- - Shell command execution
75
- - Auto-indexing when files change
76
- - Interactive chat interface
77
- - Session persistence with auto-save
78
- - MCP server integration
79
- """
80
-
81
- # Define simple tools that can execute without requiring a multi-step plan
82
- SIMPLE_TOOLS = [
83
- "list_indexed_documents",
84
- "rag_status",
85
- "query_documents",
86
- "query_specific_file",
87
- "search_indexed_chunks", # RAG: Search indexed document chunks
88
- "dump_document", # RAG: Export cached extracted text
89
- "search_file_content", # Shared: Grep-like disk search
90
- "search_file", # Shared: Find files by name
91
- "search_directory", # Shared: Find directories by name
92
- "read_file", # Shared: Read any file
93
- "write_file", # Shared: Write any file
94
- "index_directory", # RAG: Index directory
95
- "run_shell_command", # Shell: Execute commands
96
- ]
97
-
98
- def __init__(self, config: Optional[ChatAgentConfig] = None):
99
- """
100
- Initialize Chat Agent.
101
-
102
- Args:
103
- config: ChatAgentConfig object with all settings. If None, uses defaults.
104
- """
105
- # Use provided config or create default
106
- if config is None:
107
- config = ChatAgentConfig()
108
-
109
- # Initialize path validator
110
- self.path_validator = PathValidator(config.allowed_paths)
111
-
112
- # Now use config for all initialization
113
- # Store RAG configuration from config
114
- self.rag_documents = config.rag_documents
115
- self.watch_directories = config.watch_directories
116
- self.chunk_size = config.chunk_size
117
- self.max_chunks = config.max_chunks
118
-
119
- # Security: Configure allowed paths for file operations
120
- # If None, allow current directory and subdirectories
121
- if config.allowed_paths is None:
122
- self.allowed_paths = [Path.cwd()]
123
- else:
124
- self.allowed_paths = [Path(p).resolve() for p in config.allowed_paths]
125
-
126
- # Use Qwen3-Coder-30B by default for better JSON parsing (same as Jira agent)
127
- effective_model_id = config.model_id or "Qwen3-Coder-30B-A3B-Instruct-GGUF"
128
-
129
- # Debug logging for model selection
130
- logger.debug(
131
- f"Model selection: model_id={repr(config.model_id)}, effective={effective_model_id}"
132
- )
133
-
134
- # Store model for display
135
- self.model_display_name = effective_model_id
136
-
137
- # Store max_chunks for adaptive retrieval
138
- self.base_max_chunks = config.max_chunks
139
-
140
- # Initialize RAG SDK (optional - will be None if dependencies not installed)
141
- try:
142
- rag_config = RAGConfig(
143
- model=effective_model_id,
144
- chunk_size=config.chunk_size,
145
- chunk_overlap=config.chunk_overlap, # Configurable overlap for context preservation
146
- max_chunks=config.max_chunks,
147
- show_stats=config.show_stats,
148
- use_local_llm=not (config.use_claude or config.use_chatgpt),
149
- use_llm_chunking=config.use_llm_chunking, # Enable semantic chunking
150
- base_url=config.base_url, # Pass base_url to RAG for VLM client
151
- )
152
- self.rag = RAGSDK(rag_config)
153
- except ImportError as e:
154
- # RAG dependencies not installed - this is fine, RAG features will be disabled
155
- logger.debug(f"RAG dependencies not available: {e}")
156
- self.rag = None
157
-
158
- # File system monitoring
159
- self.observers = []
160
- self.file_handlers = [] # Track FileChangeHandler instances for telemetry
161
- self.indexed_files = set()
162
-
163
- # Session management
164
- self.session_manager = SessionManager()
165
- self.current_session = None
166
- self.conversation_history: List[Dict[str, str]] = (
167
- []
168
- ) # Track conversation for persistence
169
-
170
- # Call parent constructor
171
- super().__init__(
172
- use_claude=config.use_claude,
173
- use_chatgpt=config.use_chatgpt,
174
- claude_model=config.claude_model,
175
- base_url=config.base_url,
176
- model_id=effective_model_id, # Pass the effective model to parent
177
- max_steps=config.max_steps,
178
- debug_prompts=config.debug_prompts,
179
- show_prompts=config.show_prompts,
180
- output_dir=config.output_dir,
181
- streaming=config.streaming,
182
- show_stats=config.show_stats,
183
- silent_mode=config.silent_mode,
184
- debug=config.debug,
185
- )
186
-
187
- # Index initial documents (only if RAG is available)
188
- if self.rag_documents and self.rag:
189
- self._index_documents(self.rag_documents)
190
- elif self.rag_documents and not self.rag:
191
- logger.warning(
192
- "RAG dependencies not installed. Cannot index documents. "
193
- 'Install with: uv pip install -e ".[rag]"'
194
- )
195
-
196
- # Start watching directories
197
- if self.watch_directories:
198
- self._start_watching()
199
-
200
- def _post_process_tool_result(
201
- self, tool_name: str, _tool_args: Dict[str, Any], tool_result: Dict[str, Any]
202
- ) -> None:
203
- """
204
- Post-process tool results for Chat Agent.
205
-
206
- Handles RAG-specific debug information display.
207
-
208
- Args:
209
- tool_name: Name of the tool that was executed
210
- _tool_args: Arguments that were passed to the tool (unused)
211
- tool_result: Result returned by the tool
212
- """
213
- # Handle RAG query debug information
214
- if (
215
- tool_name
216
- in ["query_documents", "query_specific_file", "search_indexed_chunks"]
217
- and isinstance(tool_result, dict)
218
- and "debug_info" in tool_result
219
- and self.debug
220
- ):
221
- debug_info = tool_result.get("debug_info")
222
- print("[DEBUG] RAG Query Debug Info:")
223
- print(f" - Search keys: {debug_info.get('search_keys', [])}")
224
- print(
225
- f" - Total chunks found: {debug_info.get('total_chunks_before_dedup', 0)}"
226
- )
227
- print(
228
- f" - After deduplication: {debug_info.get('total_chunks_after_dedup', 0)}"
229
- )
230
- print(
231
- f" - Final chunks returned: {debug_info.get('final_chunks_returned', 0)}"
232
- )
233
-
234
- def _get_system_prompt(self) -> str:
235
- """Generate the system prompt for the Chat Agent."""
236
- # Get list of indexed documents
237
- indexed_docs_section = ""
238
- if hasattr(self, "rag") and self.rag and self.rag.indexed_files:
239
- doc_names = []
240
- for file_path in self.rag.indexed_files:
241
- doc_names.append(Path(file_path).name)
242
-
243
- indexed_docs_section = f"""
244
- **CURRENTLY INDEXED DOCUMENTS:**
245
- You have {len(doc_names)} document(s) already indexed and ready to search:
246
- {chr(10).join(f'- {name}' for name in sorted(doc_names))}
247
-
248
- When the user asks a question about content, you can DIRECTLY search these documents using query_documents or query_specific_file.
249
- You do NOT need to check what's indexed first - this list is always up-to-date.
250
- """
251
- else:
252
- indexed_docs_section = """
253
- **CURRENTLY INDEXED DOCUMENTS:**
254
- No documents are currently indexed.
255
-
256
- **IMPORTANT: When no documents are indexed, act as a normal conversational AI assistant.**
257
- - Answer general questions using your knowledge
258
- - Have natural conversations with the user
259
- - Do NOT try to search for documents unless the user explicitly asks to index/search files
260
- - Do NOT use query_documents or query_specific_file when no documents are indexed
261
- - Only use RAG tools when the user explicitly asks to index documents or search their files
262
- """
263
-
264
- # Build the prompt with indexed documents section
265
- # NOTE: Base agent now provides JSON format rules, so we only add ChatAgent-specific guidance
266
- base_prompt = """You are a helpful AI assistant with document search and RAG capabilities.
267
- """
268
-
269
- # Add indexed documents section
270
- prompt = (
271
- base_prompt
272
- + indexed_docs_section
273
- + """
274
- **WHEN TO USE TOOLS VS DIRECT ANSWERS:**
275
-
276
- Use Format 1 (answer) for:
277
- - Greetings: {"answer": "Hello! How can I help?"}
278
- - Thanks: {"answer": "You're welcome!"}
279
- - **General knowledge questions**: {"answer": "Kalin is a name of Slavic origin meaning..."}
280
- - **Conversation and chat**: {"answer": "That's interesting! Tell me more about..."}
281
- - Out-of-scope: {"answer": "I don't have weather data..."}
282
- - **FINAL ANSWERS after retrieving data**: {"answer": "According to the document, the vision is..."}
283
-
284
- **IMPORTANT: If no documents are indexed, answer ALL questions using general knowledge!**
285
-
286
- Use Format 2 (tool) ONLY when:
287
- - User explicitly asks to search/index files OR documents are already indexed
288
- - "what files are indexed?" → {"tool": "list_indexed_documents", "tool_args": {}}
289
- - "search for X" → {"tool": "query_documents", "tool_args": {"query": "X"}}
290
- - "what does doc say?" → {"tool": "query_specific_file", "tool_args": {...}}
291
- - "find the oil and gas manual" → {"tool": "search_file", "tool_args": {"file_pattern": "oil and gas manual"}}
292
- - "index my data folder" → {"tool": "search_directory", "tool_args": {"directory_name": "data"}}
293
- - "index files in /path/to/dir" → {"tool": "index_directory", "tool_args": {"directory_path": "/path/to/dir"}}
294
-
295
- **CRITICAL: NEVER make up or guess user data. Always use tools.**
296
-
297
- **SMART DISCOVERY WORKFLOW:**
298
-
299
- When user asks a domain-specific question (e.g., "what is the vision of the oil & gas regulator?"):
300
- 1. Check if relevant documents are indexed
301
- 2. If NO relevant documents found:
302
- a. Extract key terms from question (e.g., "oil", "gas", "regulator")
303
- b. Search for files using search_file with those terms
304
- c. If files found, index them automatically
305
- d. Provide status update: "Found and indexed X file(s)"
306
- e. Then query to answer the question
307
- 3. If documents already indexed, query directly
308
-
309
- Example Smart Discovery:
310
- User: "what is the vision of the oil & gas regulator?"
311
- You: {"tool": "list_indexed_documents", "tool_args": {}}
312
- Result: {"documents": [], "count": 0}
313
- You: {"tool": "search_file", "tool_args": {"file_pattern": "oil gas"}}
314
- Result: {"files": ["/docs/Oil-Gas-Manual.pdf"], "count": 1}
315
- You: {"tool": "index_document", "tool_args": {"file_path": "/docs/Oil-Gas-Manual.pdf"}}
316
- Result: {"status": "success", "chunks": 150}
317
- You: {"thought": "Document indexed, now searching for vision", "tool": "query_specific_file", "tool_args": {"file_path": "/docs/Oil-Gas-Manual.pdf", "query": "vision of the oil gas regulator"}}
318
- Result: {"chunks": ["The vision is to be recognized..."], "scores": [0.92]}
319
- You: {"answer": "According to the Oil & Gas Manual, the vision is to be recognized..."}
320
-
321
- **CONTEXT INFERENCE RULE:**
322
-
323
- When user asks a question without specifying which document:
324
- 1. Check the "CURRENTLY INDEXED DOCUMENTS" section above - you already know what's indexed!
325
- 2. If EXACTLY 1 document indexed → **IMMEDIATELY search it**: {"tool": "query_documents", "tool_args": {"query": "..."}}
326
- 3. If 0 documents → Use Smart Discovery workflow to find and index relevant files
327
- 4. If multiple documents → Search all with query_documents OR ask which specific one: {"answer": "Which document? You have: [list]"}
328
-
329
- **AVAILABLE TOOLS:**
330
- The complete list of available tools with their descriptions is provided below in the AVAILABLE TOOLS section.
331
- Tools are grouped by category: RAG tools, File System tools, Shell tools, etc.
332
-
333
- **FILE SEARCH AND AUTO-INDEX WORKFLOW:**
334
- When user asks "find the X manual" or "find X document on my drive":
335
- 1. Use search_file (automatically searches all drives intelligently):
336
- - Phase 1: Searches common locations (Documents, Downloads, Desktop) - FAST
337
- - Phase 2: If not found, deep search entire drive(s) - THOROUGH
338
- - Filters by document file types (.pdf, .docx, .txt, etc.)
339
- 2. Handle results:
340
- - **If 1 file found**: Automatically index it
341
- - **If multiple files found**: Display numbered list, ask user to select
342
- - **If none found**: Inform user
343
- 3. After indexing, confirm and let user know they can ask questions
344
-
345
- **IMPORTANT: Always show tool results with display_message!**
346
- Tools like search_file return a 'display_message' field - ALWAYS show this to the user:
347
-
348
- Example:
349
- Tool result: {"display_message": "✓ Found 2 file(s) in current directory (gaia)", "file_list": [...]}
350
- You must say: {"answer": "✓ Found 2 file(s) in current directory (gaia):\n1. Oil-Gas-Manual.pdf\n..."}
351
-
352
- NOTE: Progress indicators (spinners) are shown automatically by the tool while searching.
353
- You don't need to say "searching..." - the tool displays it live!
354
-
355
- Example (Single file):
356
- User: "Can you find the oil and gas manual on my drive?"
357
- You: {"tool": "search_file", "tool_args": {"file_pattern": "oil gas"}}
358
- Result: {"files": [...], "count": 1, "display_message": "🔍 Found 1 matching file(s)", "file_list": [{"number": 1, "name": "Oil-Gas-Manual.pdf", "directory": "C:/Users/user/Documents"}]}
359
- You: {"answer": "🔍 Searching for 'oil gas'... Found 1 file:\n• Oil-Gas-Manual.pdf (Documents folder)\n\nIndexing now..."}
360
- You: {"tool": "index_document", "tool_args": {"file_path": "C:/Users/user/Documents/Oil-Gas-Manual.pdf"}}
361
- You: {"answer": "✓ Indexed Oil-Gas-Manual.pdf (150 chunks). You can now ask me questions about it!"}
362
-
363
- Example (Multiple files):
364
- User: "Find the manual on my drive"
365
- You: {"answer": "🔍 Searching your drive for 'manual'..."}
366
- You: {"tool": "search_file", "tool_args": {"file_pattern": "manual"}}
367
- Result: {"count": 3, "file_list": [{"number": 1, "name": "Oil-Gas-Manual.pdf", "directory": "C:/Docs"}, {"number": 2, "name": "Safety-Manual.pdf", "directory": "C:/Downloads"}]}
368
- You: {"answer": "Found 3 matching files:\n\n1. Oil-Gas-Manual.pdf (C:/Docs/)\n2. Safety-Manual.pdf (C:/Downloads/)\n3. Training-Manual.pdf (C:/Work/)\n\nWhich one would you like me to index? (enter the number)"}
369
- User: "1"
370
- You: {"tool": "index_document", "tool_args": {"file_path": "C:/Docs/Oil-Gas-Manual.pdf"}}
371
- You: {"answer": "✓ Indexed Oil-Gas-Manual.pdf. You can now ask questions about it!"}
372
-
373
- **DIRECTORY INDEXING WORKFLOW:**
374
- When user asks to "index my data folder" or similar:
375
- 1. Use search_directory to find matching directories
376
- 2. Show user the matches and ask which one (if multiple)
377
- 3. Use index_directory on the chosen path
378
- 4. Report indexing results"""
379
- )
380
-
381
- return prompt
382
-
383
- def _create_console(self):
384
- """Create console for chat agent."""
385
- from gaia.agents.base.console import SilentConsole
386
-
387
- if self.silent_mode:
388
- # For chat agent, we ALWAYS want to show the final answer
389
- # Even in silent mode, the user needs to see the response
390
- return SilentConsole(silence_final_answer=False)
391
- return AgentConsole()
392
-
393
- def _generate_search_keys(self, query: str) -> List[str]:
394
- """
395
- Generate search keys from query for better retrieval.
396
- Extracts keywords and reformulates query for improved matching.
397
-
398
- Args:
399
- query: User query
400
-
401
- Returns:
402
- List of search keys/queries
403
- """
404
- keys = [query] # Always include original query
405
-
406
- # Extract potential keywords (simple approach)
407
- # Remove common words and extract meaningful terms
408
- stop_words = {
409
- "what",
410
- "how",
411
- "when",
412
- "where",
413
- "who",
414
- "why",
415
- "is",
416
- "are",
417
- "was",
418
- "were",
419
- "the",
420
- "a",
421
- "an",
422
- "and",
423
- "or",
424
- "but",
425
- "in",
426
- "on",
427
- "at",
428
- "to",
429
- "for",
430
- "of",
431
- "with",
432
- "by",
433
- "from",
434
- "about",
435
- "can",
436
- "could",
437
- "would",
438
- "should",
439
- "do",
440
- "does",
441
- "did",
442
- "tell",
443
- "me",
444
- "you",
445
- }
446
-
447
- words = query.lower().split()
448
- keywords = [
449
- w.strip("?,.:;!")
450
- for w in words
451
- if w.lower() not in stop_words and len(w) > 2
452
- ]
453
-
454
- # Add keyword-based query (only if different from original)
455
- if keywords:
456
- keyword_query = " ".join(keywords)
457
- if keyword_query != query: # Avoid duplicates
458
- keys.append(keyword_query)
459
-
460
- # Add question reformulations for common patterns
461
- if query.lower().startswith("what is"):
462
- topic = query[8:].strip("?").strip()
463
- keys.append(f"{topic} definition")
464
- keys.append(f"{topic} explanation")
465
- elif query.lower().startswith("how to"):
466
- topic = query[7:].strip("?").strip()
467
- keys.append(f"{topic} steps")
468
- keys.append(f"{topic} guide")
469
-
470
- logger.debug(f"Generated search keys: {keys}")
471
- return keys
472
-
473
- def _is_path_allowed(self, path: str) -> bool:
474
- """
475
- Check if a path is within allowed directories.
476
- Uses real path resolution to prevent TOCTOU attacks.
477
-
478
- Args:
479
- path: Path to validate
480
-
481
- Returns:
482
- True if path is allowed, False otherwise
483
- """
484
- try:
485
- # Resolve path using os.path.realpath to follow symlinks
486
- # This prevents TOCTOU attacks by resolving at check time
487
- real_path = Path(os.path.realpath(path)).resolve()
488
-
489
- # Check if real path is within any allowed directory
490
- for allowed_path in self.allowed_paths:
491
- try:
492
- # is_relative_to requires Python 3.9+, use alternative for compatibility
493
- real_path.relative_to(allowed_path)
494
- return True
495
- except ValueError:
496
- continue
497
-
498
- return False
499
- except Exception as e:
500
- logger.error(f"Error validating path {path}: {e}")
501
- return False
502
-
503
- def _validate_and_open_file(self, file_path: str, mode: str = "r"):
504
- """
505
- Safely open a file with path validation using O_NOFOLLOW to prevent TOCTOU attacks.
506
-
507
- This method prevents Time-of-Check-Time-of-Use vulnerabilities by:
508
- 1. Using O_NOFOLLOW flag to reject symlinks
509
- 2. Opening file with low-level os.open() before validation
510
- 3. Validating the opened file descriptor, not the path
511
-
512
- Args:
513
- file_path: Path to the file
514
- mode: File open mode ('r', 'w', 'rb', 'wb', etc.)
515
-
516
- Returns:
517
- File handle if successful
518
-
519
- Raises:
520
- PermissionError: If path is not allowed or is a symlink
521
- IOError: If file cannot be opened
522
- """
523
- import stat
524
-
525
- try:
526
- # Determine open flags based on mode
527
- if "r" in mode and "+" not in mode:
528
- flags = os.O_RDONLY
529
- elif "w" in mode:
530
- flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
531
- elif "a" in mode:
532
- flags = os.O_WRONLY | os.O_CREAT | os.O_APPEND
533
- elif "+" in mode:
534
- flags = os.O_RDWR
535
- else:
536
- flags = os.O_RDONLY
537
-
538
- # CRITICAL: Add O_NOFOLLOW to reject symlinks
539
- # This prevents TOCTOU attacks where symlinks are swapped
540
- if hasattr(os, "O_NOFOLLOW"):
541
- flags |= os.O_NOFOLLOW
542
-
543
- # Open the file at low level (doesn't follow symlinks with O_NOFOLLOW)
544
- try:
545
- fd = os.open(file_path, flags)
546
- except OSError as e:
547
- if e.errno == 40: # ELOOP - too many symbolic links
548
- raise PermissionError(f"Symlinks not allowed: {file_path}")
549
- raise IOError(f"Cannot open file {file_path}: {e}")
550
-
551
- # Get the real path of the opened file descriptor
552
- # On Linux, we can use /proc/self/fd/
553
- # On other systems, use fstat
554
- try:
555
- file_stat = os.fstat(fd)
556
-
557
- # Verify it's a regular file, not a directory or special file
558
- if not stat.S_ISREG(file_stat.st_mode):
559
- os.close(fd)
560
- raise PermissionError(f"Not a regular file: {file_path}")
561
-
562
- # Get the real path (Linux-specific, but works on most Unix)
563
- if os.path.exists(f"/proc/self/fd/{fd}"):
564
- real_path = Path(os.readlink(f"/proc/self/fd/{fd}")).resolve()
565
- else:
566
- # Fallback for non-Linux systems
567
- real_path = Path(file_path).resolve()
568
-
569
- # Validate the real path is within allowed directories
570
- path_allowed = False
571
- for allowed_path in self.allowed_paths:
572
- try:
573
- real_path.relative_to(allowed_path)
574
- path_allowed = True
575
- break
576
- except ValueError:
577
- continue
578
-
579
- if not path_allowed:
580
- os.close(fd)
581
- raise PermissionError(
582
- f"Access denied to path: {real_path}\n"
583
- f"Requested: {file_path}\n"
584
- f"Resolved to path outside allowed directories"
585
- )
586
-
587
- # Convert file descriptor to Python file object
588
- if "b" in mode:
589
- return os.fdopen(fd, mode)
590
- else:
591
- return os.fdopen(fd, mode, encoding="utf-8")
592
-
593
- except Exception:
594
- os.close(fd)
595
- raise
596
-
597
- except PermissionError:
598
- raise
599
- except Exception as e:
600
- raise IOError(f"Failed to securely open file {file_path}: {e}")
601
-
602
- def _auto_save_session(self) -> None:
603
- """Auto-save current session (called after important operations)."""
604
- try:
605
- if self.current_session:
606
- self.save_current_session()
607
- if self.debug:
608
- logger.debug(
609
- f"Auto-saved session: {self.current_session.session_id}"
610
- )
611
- except Exception as e:
612
- logger.warning(f"Auto-save failed: {e}")
613
-
614
- def _register_tools(self) -> None:
615
- """Register chat agent tools from mixins."""
616
- # Register tools from mixins
617
- self.register_rag_tools()
618
- self.register_file_tools()
619
- self.register_shell_tools()
620
- self.register_file_search_tools() # Shared file search tools
621
-
622
- # NOTE: The actual tool definitions are in the mixin classes:
623
- # - RAGToolsMixin (rag_tools.py): RAG and document indexing tools
624
- # - FileToolsMixin (file_tools.py): Directory monitoring
625
- # - ShellToolsMixin (shell_tools.py): Shell command execution
626
- # - FileSearchToolsMixin (shared): File and directory search across drives
627
-
628
- def _index_documents(self, documents: List[str]) -> None:
629
- """Index initial documents."""
630
- for doc in documents:
631
- try:
632
- if os.path.exists(doc):
633
- logger.info(f"Indexing document: {doc}")
634
- result = self.rag.index_document(doc)
635
-
636
- if result.get("success"):
637
- self.indexed_files.add(doc)
638
- logger.info(
639
- f"Successfully indexed: {doc} ({result.get('num_chunks', 0)} chunks)"
640
- )
641
- else:
642
- error = result.get("error", "Unknown error")
643
- logger.error(f"Failed to index {doc}: {error}")
644
- else:
645
- logger.warning(f"Document not found: {doc}")
646
- except Exception as e:
647
- logger.error(f"Failed to index {doc}: {e}")
648
-
649
- # Update system prompt after indexing to include the new documents
650
- self._update_system_prompt()
651
-
652
- def _update_system_prompt(self) -> None:
653
- """Update the system prompt with current indexed documents."""
654
- # Regenerate the system prompt with updated document list
655
- self.system_prompt = self._get_system_prompt()
656
-
657
- # Add the tools description using the parent class method
658
- tools_description = self._format_tools_for_prompt()
659
- self.system_prompt += f"\n\n==== AVAILABLE TOOLS ====\n{tools_description}\n\n"
660
-
661
- if self.rag:
662
- logger.debug(
663
- f"Updated system prompt with {len(self.rag.indexed_files)} indexed documents"
664
- )
665
-
666
- def _start_watching(self) -> None:
667
- """Start watching directories for changes."""
668
- for directory in self.watch_directories:
669
- self._watch_directory(directory)
670
-
671
- def _watch_directory(self, directory: str) -> None:
672
- """Watch a directory for file changes."""
673
- if not check_watchdog_available():
674
- error_msg = (
675
- "\n❌ Error: Missing required package 'watchdog'\n\n"
676
- "File watching requires the watchdog package.\n"
677
- "Please install the required dependencies:\n"
678
- ' uv pip install -e ".[dev]"\n\n'
679
- "Or install watchdog directly:\n"
680
- ' uv pip install "watchdog>=2.1.0"\n'
681
- )
682
- logger.error(error_msg)
683
- raise ImportError(error_msg)
684
-
685
- try:
686
- # Use generic FileChangeHandler with callbacks
687
- event_handler = FileChangeHandler(
688
- on_created=self.reindex_file,
689
- on_modified=self.reindex_file,
690
- on_deleted=self._handle_file_deletion,
691
- on_moved=self._handle_file_move,
692
- )
693
- observer = Observer()
694
- observer.schedule(event_handler, directory, recursive=True)
695
- observer.start()
696
- self.observers.append(observer)
697
- logger.info(f"Started watching: {directory}")
698
- except Exception as e:
699
- logger.error(f"Failed to watch {directory}: {e}")
700
-
701
- def _handle_file_deletion(self, file_path: str) -> None:
702
- """Handle file deletion by removing it from the index."""
703
- if not self.rag:
704
- return
705
-
706
- try:
707
- file_abs_path = str(Path(file_path).absolute())
708
- if file_abs_path in self.indexed_files:
709
- logger.info(f"File deleted, removing from index: {file_path}")
710
- if self.rag.remove_document(file_abs_path):
711
- self.indexed_files.discard(file_abs_path)
712
- logger.info(
713
- f"Successfully removed deleted file from index: {file_path}"
714
- )
715
- else:
716
- logger.warning(
717
- f"Failed to remove deleted file from index: {file_path}"
718
- )
719
- except Exception as e:
720
- logger.error(f"Error handling file deletion {file_path}: {e}")
721
-
722
- def _handle_file_move(self, src_path: str, dest_path: str) -> None:
723
- """Handle file move by removing old path and indexing new path."""
724
- self._handle_file_deletion(src_path)
725
- self.reindex_file(dest_path)
726
-
727
- def reindex_file(self, file_path: str) -> None:
728
- """Reindex a file that was modified or created."""
729
- if not self.rag:
730
- logger.warning(
731
- f"Cannot reindex {file_path}: RAG dependencies not installed"
732
- )
733
- return
734
-
735
- try:
736
- logger.info(f"Reindexing: {file_path}")
737
- # Use the new reindex_document method which removes old chunks first
738
- result = self.rag.reindex_document(file_path)
739
- if result.get("success"):
740
- self.indexed_files.add(file_path)
741
- logger.info(f"Successfully reindexed {file_path}")
742
- else:
743
- error = result.get("error", "Unknown error")
744
- logger.error(f"Failed to reindex {file_path}: {error}")
745
- except Exception as e:
746
- logger.error(f"Failed to reindex {file_path}: {e}")
747
-
748
- def stop_watching(self) -> None:
749
- """Stop all file system observers."""
750
- for observer in self.observers:
751
- observer.stop()
752
- observer.join()
753
- self.observers.clear()
754
-
755
- def load_session(self, session_id: str) -> bool:
756
- """
757
- Load a saved session.
758
-
759
- Args:
760
- session_id: Session ID to load
761
-
762
- Returns:
763
- True if successful
764
- """
765
- try:
766
- session = self.session_manager.load_session(session_id)
767
- if not session:
768
- logger.error(f"Session not found: {session_id}")
769
- return False
770
-
771
- self.current_session = session
772
-
773
- # Restore indexed documents (only if RAG is available)
774
- if self.rag:
775
- for doc_path in session.indexed_documents:
776
- if os.path.exists(doc_path):
777
- try:
778
- self.rag.index_document(doc_path)
779
- self.indexed_files.add(doc_path)
780
- except Exception as e:
781
- logger.warning(f"Failed to reindex {doc_path}: {e}")
782
- elif session.indexed_documents:
783
- logger.warning(
784
- f"Cannot restore {len(session.indexed_documents)} indexed documents: "
785
- "RAG dependencies not installed"
786
- )
787
-
788
- # Restore watched directories
789
- for dir_path in session.watched_directories:
790
- if os.path.exists(dir_path) and dir_path not in self.watch_directories:
791
- self.watch_directories.append(dir_path)
792
- self._watch_directory(dir_path)
793
-
794
- # Restore conversation history
795
- self.conversation_history = list(session.chat_history)
796
-
797
- logger.info(
798
- f"Loaded session {session_id}: {len(session.indexed_documents)} docs, {len(session.chat_history)} messages"
799
- )
800
- return True
801
-
802
- except Exception as e:
803
- logger.error(f"Error loading session: {e}")
804
- return False
805
-
806
- def save_current_session(self) -> bool:
807
- """
808
- Save the current session.
809
-
810
- Returns:
811
- True if successful
812
- """
813
- try:
814
- if not self.current_session:
815
- # Create new session
816
- self.current_session = self.session_manager.create_session()
817
-
818
- # Update session data
819
- self.current_session.indexed_documents = list(self.indexed_files)
820
- self.current_session.watched_directories = list(self.watch_directories)
821
- self.current_session.chat_history = list(self.conversation_history)
822
-
823
- # Save
824
- return self.session_manager.save_session(self.current_session)
825
-
826
- except Exception as e:
827
- logger.error(f"Error saving session: {e}")
828
- return False
829
-
830
- def __del__(self):
831
- """Cleanup when agent is destroyed."""
832
- try:
833
- self.stop_watching()
834
- except Exception as e:
835
- logger.error(f"Error stopping file watchers during cleanup: {e}")
1
+ # Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
2
+ # SPDX-License-Identifier: MIT
3
+ """
4
+ Chat Agent - Interactive chat with RAG and file search capabilities.
5
+ """
6
+
7
+ import os
8
+ from dataclasses import dataclass, field
9
+ from pathlib import Path
10
+ from typing import Any, Dict, List, Optional
11
+
12
+ try:
13
+ from watchdog.observers import Observer
14
+ except ImportError:
15
+ Observer = None
16
+
17
+ from gaia.agents.base.agent import Agent
18
+ from gaia.agents.base.console import AgentConsole
19
+ from gaia.agents.chat.session import SessionManager
20
+ from gaia.agents.chat.tools import FileToolsMixin, RAGToolsMixin, ShellToolsMixin
21
+ from gaia.agents.tools import FileSearchToolsMixin # Shared file search tools
22
+ from gaia.logger import get_logger
23
+ from gaia.rag.sdk import RAGSDK, RAGConfig
24
+ from gaia.security import PathValidator
25
+ from gaia.utils.file_watcher import FileChangeHandler, check_watchdog_available
26
+
27
+ logger = get_logger(__name__)
28
+
29
+
30
+ @dataclass
31
+ class ChatAgentConfig:
32
+ """Configuration for ChatAgent."""
33
+
34
+ # LLM settings
35
+ use_claude: bool = False
36
+ use_chatgpt: bool = False
37
+ claude_model: str = "claude-sonnet-4-20250514"
38
+ base_url: str = "http://localhost:8000/api/v1"
39
+ model_id: Optional[str] = None # None = use default Qwen3-Coder-30B
40
+
41
+ # Execution settings
42
+ max_steps: int = 10
43
+ streaming: bool = False # Use --streaming to enable
44
+
45
+ # Debug/output settings
46
+ debug: bool = False
47
+ debug_prompts: bool = False # Backward compatibility
48
+ show_prompts: bool = False
49
+ show_stats: bool = False
50
+ silent_mode: bool = False
51
+ output_dir: Optional[str] = None
52
+
53
+ # RAG settings
54
+ rag_documents: List[str] = field(default_factory=list)
55
+ watch_directories: List[str] = field(default_factory=list)
56
+ chunk_size: int = 500
57
+ chunk_overlap: int = 100
58
+ max_chunks: int = 5
59
+ use_llm_chunking: bool = False # Use fast heuristic-based chunking by default
60
+
61
+ # Security
62
+ allowed_paths: Optional[List[str]] = None
63
+
64
+
65
+ class ChatAgent(
66
+ Agent, RAGToolsMixin, FileToolsMixin, ShellToolsMixin, FileSearchToolsMixin
67
+ ):
68
+ """
69
+ Chat Agent with RAG, file operations, and shell command capabilities.
70
+
71
+ This agent provides:
72
+ - Document Q&A using RAG
73
+ - File search and operations
74
+ - Shell command execution
75
+ - Auto-indexing when files change
76
+ - Interactive chat interface
77
+ - Session persistence with auto-save
78
+ - MCP server integration
79
+ """
80
+
81
+ # Define simple tools that can execute without requiring a multi-step plan
82
+ SIMPLE_TOOLS = [
83
+ "list_indexed_documents",
84
+ "rag_status",
85
+ "query_documents",
86
+ "query_specific_file",
87
+ "search_indexed_chunks", # RAG: Search indexed document chunks
88
+ "dump_document", # RAG: Export cached extracted text
89
+ "search_file_content", # Shared: Grep-like disk search
90
+ "search_file", # Shared: Find files by name
91
+ "search_directory", # Shared: Find directories by name
92
+ "read_file", # Shared: Read any file
93
+ "write_file", # Shared: Write any file
94
+ "index_directory", # RAG: Index directory
95
+ "run_shell_command", # Shell: Execute commands
96
+ ]
97
+
98
+ def __init__(self, config: Optional[ChatAgentConfig] = None):
99
+ """
100
+ Initialize Chat Agent.
101
+
102
+ Args:
103
+ config: ChatAgentConfig object with all settings. If None, uses defaults.
104
+ """
105
+ # Use provided config or create default
106
+ if config is None:
107
+ config = ChatAgentConfig()
108
+
109
+ # Initialize path validator
110
+ self.path_validator = PathValidator(config.allowed_paths)
111
+
112
+ # Now use config for all initialization
113
+ # Store RAG configuration from config
114
+ self.rag_documents = config.rag_documents
115
+ self.watch_directories = config.watch_directories
116
+ self.chunk_size = config.chunk_size
117
+ self.max_chunks = config.max_chunks
118
+
119
+ # Security: Configure allowed paths for file operations
120
+ # If None, allow current directory and subdirectories
121
+ if config.allowed_paths is None:
122
+ self.allowed_paths = [Path.cwd()]
123
+ else:
124
+ self.allowed_paths = [Path(p).resolve() for p in config.allowed_paths]
125
+
126
+ # Use Qwen3-Coder-30B by default for better JSON parsing (same as Jira agent)
127
+ effective_model_id = config.model_id or "Qwen3-Coder-30B-A3B-Instruct-GGUF"
128
+
129
+ # Debug logging for model selection
130
+ logger.debug(
131
+ f"Model selection: model_id={repr(config.model_id)}, effective={effective_model_id}"
132
+ )
133
+
134
+ # Store model for display
135
+ self.model_display_name = effective_model_id
136
+
137
+ # Store max_chunks for adaptive retrieval
138
+ self.base_max_chunks = config.max_chunks
139
+
140
+ # Initialize RAG SDK (optional - will be None if dependencies not installed)
141
+ try:
142
+ rag_config = RAGConfig(
143
+ model=effective_model_id,
144
+ chunk_size=config.chunk_size,
145
+ chunk_overlap=config.chunk_overlap, # Configurable overlap for context preservation
146
+ max_chunks=config.max_chunks,
147
+ show_stats=config.show_stats,
148
+ use_local_llm=not (config.use_claude or config.use_chatgpt),
149
+ use_llm_chunking=config.use_llm_chunking, # Enable semantic chunking
150
+ base_url=config.base_url, # Pass base_url to RAG for VLM client
151
+ )
152
+ self.rag = RAGSDK(rag_config)
153
+ except ImportError as e:
154
+ # RAG dependencies not installed - this is fine, RAG features will be disabled
155
+ logger.debug(f"RAG dependencies not available: {e}")
156
+ self.rag = None
157
+
158
+ # File system monitoring
159
+ self.observers = []
160
+ self.file_handlers = [] # Track FileChangeHandler instances for telemetry
161
+ self.indexed_files = set()
162
+
163
+ # Session management
164
+ self.session_manager = SessionManager()
165
+ self.current_session = None
166
+ self.conversation_history: List[Dict[str, str]] = (
167
+ []
168
+ ) # Track conversation for persistence
169
+
170
+ # Call parent constructor
171
+ super().__init__(
172
+ use_claude=config.use_claude,
173
+ use_chatgpt=config.use_chatgpt,
174
+ claude_model=config.claude_model,
175
+ base_url=config.base_url,
176
+ model_id=effective_model_id, # Pass the effective model to parent
177
+ max_steps=config.max_steps,
178
+ debug_prompts=config.debug_prompts,
179
+ show_prompts=config.show_prompts,
180
+ output_dir=config.output_dir,
181
+ streaming=config.streaming,
182
+ show_stats=config.show_stats,
183
+ silent_mode=config.silent_mode,
184
+ debug=config.debug,
185
+ )
186
+
187
+ # Index initial documents (only if RAG is available)
188
+ if self.rag_documents and self.rag:
189
+ self._index_documents(self.rag_documents)
190
+ elif self.rag_documents and not self.rag:
191
+ logger.warning(
192
+ "RAG dependencies not installed. Cannot index documents. "
193
+ 'Install with: uv pip install -e ".[rag]"'
194
+ )
195
+
196
+ # Start watching directories
197
+ if self.watch_directories:
198
+ self._start_watching()
199
+
200
+ def _post_process_tool_result(
201
+ self, tool_name: str, _tool_args: Dict[str, Any], tool_result: Dict[str, Any]
202
+ ) -> None:
203
+ """
204
+ Post-process tool results for Chat Agent.
205
+
206
+ Handles RAG-specific debug information display.
207
+
208
+ Args:
209
+ tool_name: Name of the tool that was executed
210
+ _tool_args: Arguments that were passed to the tool (unused)
211
+ tool_result: Result returned by the tool
212
+ """
213
+ # Handle RAG query debug information
214
+ if (
215
+ tool_name
216
+ in ["query_documents", "query_specific_file", "search_indexed_chunks"]
217
+ and isinstance(tool_result, dict)
218
+ and "debug_info" in tool_result
219
+ and self.debug
220
+ ):
221
+ debug_info = tool_result.get("debug_info")
222
+ print("[DEBUG] RAG Query Debug Info:")
223
+ print(f" - Search keys: {debug_info.get('search_keys', [])}")
224
+ print(
225
+ f" - Total chunks found: {debug_info.get('total_chunks_before_dedup', 0)}"
226
+ )
227
+ print(
228
+ f" - After deduplication: {debug_info.get('total_chunks_after_dedup', 0)}"
229
+ )
230
+ print(
231
+ f" - Final chunks returned: {debug_info.get('final_chunks_returned', 0)}"
232
+ )
233
+
234
+ def _get_system_prompt(self) -> str:
235
+ """Generate the system prompt for the Chat Agent."""
236
+ # Get list of indexed documents
237
+ indexed_docs_section = ""
238
+ if hasattr(self, "rag") and self.rag and self.rag.indexed_files:
239
+ doc_names = []
240
+ for file_path in self.rag.indexed_files:
241
+ doc_names.append(Path(file_path).name)
242
+
243
+ indexed_docs_section = f"""
244
+ **CURRENTLY INDEXED DOCUMENTS:**
245
+ You have {len(doc_names)} document(s) already indexed and ready to search:
246
+ {chr(10).join(f'- {name}' for name in sorted(doc_names))}
247
+
248
+ When the user asks a question about content, you can DIRECTLY search these documents using query_documents or query_specific_file.
249
+ You do NOT need to check what's indexed first - this list is always up-to-date.
250
+ """
251
+ else:
252
+ indexed_docs_section = """
253
+ **CURRENTLY INDEXED DOCUMENTS:**
254
+ No documents are currently indexed.
255
+
256
+ **IMPORTANT: When no documents are indexed, act as a normal conversational AI assistant.**
257
+ - Answer general questions using your knowledge
258
+ - Have natural conversations with the user
259
+ - Do NOT try to search for documents unless the user explicitly asks to index/search files
260
+ - Do NOT use query_documents or query_specific_file when no documents are indexed
261
+ - Only use RAG tools when the user explicitly asks to index documents or search their files
262
+ """
263
+
264
+ # Build the prompt with indexed documents section
265
+ # NOTE: Base agent now provides JSON format rules, so we only add ChatAgent-specific guidance
266
+ base_prompt = """You are a helpful AI assistant with document search and RAG capabilities.
267
+ """
268
+
269
+ # Add indexed documents section
270
+ prompt = (
271
+ base_prompt
272
+ + indexed_docs_section
273
+ + """
274
+ **WHEN TO USE TOOLS VS DIRECT ANSWERS:**
275
+
276
+ Use Format 1 (answer) for:
277
+ - Greetings: {"answer": "Hello! How can I help?"}
278
+ - Thanks: {"answer": "You're welcome!"}
279
+ - **General knowledge questions**: {"answer": "Kalin is a name of Slavic origin meaning..."}
280
+ - **Conversation and chat**: {"answer": "That's interesting! Tell me more about..."}
281
+ - Out-of-scope: {"answer": "I don't have weather data..."}
282
+ - **FINAL ANSWERS after retrieving data**: {"answer": "According to the document, the vision is..."}
283
+
284
+ **IMPORTANT: If no documents are indexed, answer ALL questions using general knowledge!**
285
+
286
+ Use Format 2 (tool) ONLY when:
287
+ - User explicitly asks to search/index files OR documents are already indexed
288
+ - "what files are indexed?" → {"tool": "list_indexed_documents", "tool_args": {}}
289
+ - "search for X" → {"tool": "query_documents", "tool_args": {"query": "X"}}
290
+ - "what does doc say?" → {"tool": "query_specific_file", "tool_args": {...}}
291
+ - "find the oil and gas manual" → {"tool": "search_file", "tool_args": {"file_pattern": "oil and gas manual"}}
292
+ - "index my data folder" → {"tool": "search_directory", "tool_args": {"directory_name": "data"}}
293
+ - "index files in /path/to/dir" → {"tool": "index_directory", "tool_args": {"directory_path": "/path/to/dir"}}
294
+
295
+ **CRITICAL: NEVER make up or guess user data. Always use tools.**
296
+
297
+ **SMART DISCOVERY WORKFLOW:**
298
+
299
+ When user asks a domain-specific question (e.g., "what is the vision of the oil & gas regulator?"):
300
+ 1. Check if relevant documents are indexed
301
+ 2. If NO relevant documents found:
302
+ a. Extract key terms from question (e.g., "oil", "gas", "regulator")
303
+ b. Search for files using search_file with those terms
304
+ c. If files found, index them automatically
305
+ d. Provide status update: "Found and indexed X file(s)"
306
+ e. Then query to answer the question
307
+ 3. If documents already indexed, query directly
308
+
309
+ Example Smart Discovery:
310
+ User: "what is the vision of the oil & gas regulator?"
311
+ You: {"tool": "list_indexed_documents", "tool_args": {}}
312
+ Result: {"documents": [], "count": 0}
313
+ You: {"tool": "search_file", "tool_args": {"file_pattern": "oil gas"}}
314
+ Result: {"files": ["/docs/Oil-Gas-Manual.pdf"], "count": 1}
315
+ You: {"tool": "index_document", "tool_args": {"file_path": "/docs/Oil-Gas-Manual.pdf"}}
316
+ Result: {"status": "success", "chunks": 150}
317
+ You: {"thought": "Document indexed, now searching for vision", "tool": "query_specific_file", "tool_args": {"file_path": "/docs/Oil-Gas-Manual.pdf", "query": "vision of the oil gas regulator"}}
318
+ Result: {"chunks": ["The vision is to be recognized..."], "scores": [0.92]}
319
+ You: {"answer": "According to the Oil & Gas Manual, the vision is to be recognized..."}
320
+
321
+ **CONTEXT INFERENCE RULE:**
322
+
323
+ When user asks a question without specifying which document:
324
+ 1. Check the "CURRENTLY INDEXED DOCUMENTS" section above - you already know what's indexed!
325
+ 2. If EXACTLY 1 document indexed → **IMMEDIATELY search it**: {"tool": "query_documents", "tool_args": {"query": "..."}}
326
+ 3. If 0 documents → Use Smart Discovery workflow to find and index relevant files
327
+ 4. If multiple documents → Search all with query_documents OR ask which specific one: {"answer": "Which document? You have: [list]"}
328
+
329
+ **AVAILABLE TOOLS:**
330
+ The complete list of available tools with their descriptions is provided below in the AVAILABLE TOOLS section.
331
+ Tools are grouped by category: RAG tools, File System tools, Shell tools, etc.
332
+
333
+ **FILE SEARCH AND AUTO-INDEX WORKFLOW:**
334
+ When user asks "find the X manual" or "find X document on my drive":
335
+ 1. Use search_file (automatically searches all drives intelligently):
336
+ - Phase 1: Searches common locations (Documents, Downloads, Desktop) - FAST
337
+ - Phase 2: If not found, deep search entire drive(s) - THOROUGH
338
+ - Filters by document file types (.pdf, .docx, .txt, etc.)
339
+ 2. Handle results:
340
+ - **If 1 file found**: Automatically index it
341
+ - **If multiple files found**: Display numbered list, ask user to select
342
+ - **If none found**: Inform user
343
+ 3. After indexing, confirm and let user know they can ask questions
344
+
345
+ **IMPORTANT: Always show tool results with display_message!**
346
+ Tools like search_file return a 'display_message' field - ALWAYS show this to the user:
347
+
348
+ Example:
349
+ Tool result: {"display_message": "✓ Found 2 file(s) in current directory (gaia)", "file_list": [...]}
350
+ You must say: {"answer": "✓ Found 2 file(s) in current directory (gaia):\n1. Oil-Gas-Manual.pdf\n..."}
351
+
352
+ NOTE: Progress indicators (spinners) are shown automatically by the tool while searching.
353
+ You don't need to say "searching..." - the tool displays it live!
354
+
355
+ Example (Single file):
356
+ User: "Can you find the oil and gas manual on my drive?"
357
+ You: {"tool": "search_file", "tool_args": {"file_pattern": "oil gas"}}
358
+ Result: {"files": [...], "count": 1, "display_message": "🔍 Found 1 matching file(s)", "file_list": [{"number": 1, "name": "Oil-Gas-Manual.pdf", "directory": "C:/Users/user/Documents"}]}
359
+ You: {"answer": "🔍 Searching for 'oil gas'... Found 1 file:\n• Oil-Gas-Manual.pdf (Documents folder)\n\nIndexing now..."}
360
+ You: {"tool": "index_document", "tool_args": {"file_path": "C:/Users/user/Documents/Oil-Gas-Manual.pdf"}}
361
+ You: {"answer": "✓ Indexed Oil-Gas-Manual.pdf (150 chunks). You can now ask me questions about it!"}
362
+
363
+ Example (Multiple files):
364
+ User: "Find the manual on my drive"
365
+ You: {"answer": "🔍 Searching your drive for 'manual'..."}
366
+ You: {"tool": "search_file", "tool_args": {"file_pattern": "manual"}}
367
+ Result: {"count": 3, "file_list": [{"number": 1, "name": "Oil-Gas-Manual.pdf", "directory": "C:/Docs"}, {"number": 2, "name": "Safety-Manual.pdf", "directory": "C:/Downloads"}]}
368
+ You: {"answer": "Found 3 matching files:\n\n1. Oil-Gas-Manual.pdf (C:/Docs/)\n2. Safety-Manual.pdf (C:/Downloads/)\n3. Training-Manual.pdf (C:/Work/)\n\nWhich one would you like me to index? (enter the number)"}
369
+ User: "1"
370
+ You: {"tool": "index_document", "tool_args": {"file_path": "C:/Docs/Oil-Gas-Manual.pdf"}}
371
+ You: {"answer": "✓ Indexed Oil-Gas-Manual.pdf. You can now ask questions about it!"}
372
+
373
+ **DIRECTORY INDEXING WORKFLOW:**
374
+ When user asks to "index my data folder" or similar:
375
+ 1. Use search_directory to find matching directories
376
+ 2. Show user the matches and ask which one (if multiple)
377
+ 3. Use index_directory on the chosen path
378
+ 4. Report indexing results"""
379
+ )
380
+
381
+ return prompt
382
+
383
+ def _create_console(self):
384
+ """Create console for chat agent."""
385
+ from gaia.agents.base.console import SilentConsole
386
+
387
+ if self.silent_mode:
388
+ # For chat agent, we ALWAYS want to show the final answer
389
+ # Even in silent mode, the user needs to see the response
390
+ return SilentConsole(silence_final_answer=False)
391
+ return AgentConsole()
392
+
393
+ def _generate_search_keys(self, query: str) -> List[str]:
394
+ """
395
+ Generate search keys from query for better retrieval.
396
+ Extracts keywords and reformulates query for improved matching.
397
+
398
+ Args:
399
+ query: User query
400
+
401
+ Returns:
402
+ List of search keys/queries
403
+ """
404
+ keys = [query] # Always include original query
405
+
406
+ # Extract potential keywords (simple approach)
407
+ # Remove common words and extract meaningful terms
408
+ stop_words = {
409
+ "what",
410
+ "how",
411
+ "when",
412
+ "where",
413
+ "who",
414
+ "why",
415
+ "is",
416
+ "are",
417
+ "was",
418
+ "were",
419
+ "the",
420
+ "a",
421
+ "an",
422
+ "and",
423
+ "or",
424
+ "but",
425
+ "in",
426
+ "on",
427
+ "at",
428
+ "to",
429
+ "for",
430
+ "of",
431
+ "with",
432
+ "by",
433
+ "from",
434
+ "about",
435
+ "can",
436
+ "could",
437
+ "would",
438
+ "should",
439
+ "do",
440
+ "does",
441
+ "did",
442
+ "tell",
443
+ "me",
444
+ "you",
445
+ }
446
+
447
+ words = query.lower().split()
448
+ keywords = [
449
+ w.strip("?,.:;!")
450
+ for w in words
451
+ if w.lower() not in stop_words and len(w) > 2
452
+ ]
453
+
454
+ # Add keyword-based query (only if different from original)
455
+ if keywords:
456
+ keyword_query = " ".join(keywords)
457
+ if keyword_query != query: # Avoid duplicates
458
+ keys.append(keyword_query)
459
+
460
+ # Add question reformulations for common patterns
461
+ if query.lower().startswith("what is"):
462
+ topic = query[8:].strip("?").strip()
463
+ keys.append(f"{topic} definition")
464
+ keys.append(f"{topic} explanation")
465
+ elif query.lower().startswith("how to"):
466
+ topic = query[7:].strip("?").strip()
467
+ keys.append(f"{topic} steps")
468
+ keys.append(f"{topic} guide")
469
+
470
+ logger.debug(f"Generated search keys: {keys}")
471
+ return keys
472
+
473
+ def _is_path_allowed(self, path: str) -> bool:
474
+ """
475
+ Check if a path is within allowed directories.
476
+ Uses real path resolution to prevent TOCTOU attacks.
477
+
478
+ Args:
479
+ path: Path to validate
480
+
481
+ Returns:
482
+ True if path is allowed, False otherwise
483
+ """
484
+ try:
485
+ # Resolve path using os.path.realpath to follow symlinks
486
+ # This prevents TOCTOU attacks by resolving at check time
487
+ real_path = Path(os.path.realpath(path)).resolve()
488
+
489
+ # Check if real path is within any allowed directory
490
+ for allowed_path in self.allowed_paths:
491
+ try:
492
+ # is_relative_to requires Python 3.9+, use alternative for compatibility
493
+ real_path.relative_to(allowed_path)
494
+ return True
495
+ except ValueError:
496
+ continue
497
+
498
+ return False
499
+ except Exception as e:
500
+ logger.error(f"Error validating path {path}: {e}")
501
+ return False
502
+
503
+ def _validate_and_open_file(self, file_path: str, mode: str = "r"):
504
+ """
505
+ Safely open a file with path validation using O_NOFOLLOW to prevent TOCTOU attacks.
506
+
507
+ This method prevents Time-of-Check-Time-of-Use vulnerabilities by:
508
+ 1. Using O_NOFOLLOW flag to reject symlinks
509
+ 2. Opening file with low-level os.open() before validation
510
+ 3. Validating the opened file descriptor, not the path
511
+
512
+ Args:
513
+ file_path: Path to the file
514
+ mode: File open mode ('r', 'w', 'rb', 'wb', etc.)
515
+
516
+ Returns:
517
+ File handle if successful
518
+
519
+ Raises:
520
+ PermissionError: If path is not allowed or is a symlink
521
+ IOError: If file cannot be opened
522
+ """
523
+ import stat
524
+
525
+ try:
526
+ # Determine open flags based on mode
527
+ if "r" in mode and "+" not in mode:
528
+ flags = os.O_RDONLY
529
+ elif "w" in mode:
530
+ flags = os.O_WRONLY | os.O_CREAT | os.O_TRUNC
531
+ elif "a" in mode:
532
+ flags = os.O_WRONLY | os.O_CREAT | os.O_APPEND
533
+ elif "+" in mode:
534
+ flags = os.O_RDWR
535
+ else:
536
+ flags = os.O_RDONLY
537
+
538
+ # CRITICAL: Add O_NOFOLLOW to reject symlinks
539
+ # This prevents TOCTOU attacks where symlinks are swapped
540
+ if hasattr(os, "O_NOFOLLOW"):
541
+ flags |= os.O_NOFOLLOW
542
+
543
+ # Open the file at low level (doesn't follow symlinks with O_NOFOLLOW)
544
+ try:
545
+ fd = os.open(file_path, flags)
546
+ except OSError as e:
547
+ if e.errno == 40: # ELOOP - too many symbolic links
548
+ raise PermissionError(f"Symlinks not allowed: {file_path}")
549
+ raise IOError(f"Cannot open file {file_path}: {e}")
550
+
551
+ # Get the real path of the opened file descriptor
552
+ # On Linux, we can use /proc/self/fd/
553
+ # On other systems, use fstat
554
+ try:
555
+ file_stat = os.fstat(fd)
556
+
557
+ # Verify it's a regular file, not a directory or special file
558
+ if not stat.S_ISREG(file_stat.st_mode):
559
+ os.close(fd)
560
+ raise PermissionError(f"Not a regular file: {file_path}")
561
+
562
+ # Get the real path (Linux-specific, but works on most Unix)
563
+ if os.path.exists(f"/proc/self/fd/{fd}"):
564
+ real_path = Path(os.readlink(f"/proc/self/fd/{fd}")).resolve()
565
+ else:
566
+ # Fallback for non-Linux systems
567
+ real_path = Path(file_path).resolve()
568
+
569
+ # Validate the real path is within allowed directories
570
+ path_allowed = False
571
+ for allowed_path in self.allowed_paths:
572
+ try:
573
+ real_path.relative_to(allowed_path)
574
+ path_allowed = True
575
+ break
576
+ except ValueError:
577
+ continue
578
+
579
+ if not path_allowed:
580
+ os.close(fd)
581
+ raise PermissionError(
582
+ f"Access denied to path: {real_path}\n"
583
+ f"Requested: {file_path}\n"
584
+ f"Resolved to path outside allowed directories"
585
+ )
586
+
587
+ # Convert file descriptor to Python file object
588
+ if "b" in mode:
589
+ return os.fdopen(fd, mode)
590
+ else:
591
+ return os.fdopen(fd, mode, encoding="utf-8")
592
+
593
+ except Exception:
594
+ os.close(fd)
595
+ raise
596
+
597
+ except PermissionError:
598
+ raise
599
+ except Exception as e:
600
+ raise IOError(f"Failed to securely open file {file_path}: {e}")
601
+
602
+ def _auto_save_session(self) -> None:
603
+ """Auto-save current session (called after important operations)."""
604
+ try:
605
+ if self.current_session:
606
+ self.save_current_session()
607
+ if self.debug:
608
+ logger.debug(
609
+ f"Auto-saved session: {self.current_session.session_id}"
610
+ )
611
+ except Exception as e:
612
+ logger.warning(f"Auto-save failed: {e}")
613
+
614
+ def _register_tools(self) -> None:
615
+ """Register chat agent tools from mixins."""
616
+ # Register tools from mixins
617
+ self.register_rag_tools()
618
+ self.register_file_tools()
619
+ self.register_shell_tools()
620
+ self.register_file_search_tools() # Shared file search tools
621
+
622
+ # NOTE: The actual tool definitions are in the mixin classes:
623
+ # - RAGToolsMixin (rag_tools.py): RAG and document indexing tools
624
+ # - FileToolsMixin (file_tools.py): Directory monitoring
625
+ # - ShellToolsMixin (shell_tools.py): Shell command execution
626
+ # - FileSearchToolsMixin (shared): File and directory search across drives
627
+
628
+ def _index_documents(self, documents: List[str]) -> None:
629
+ """Index initial documents."""
630
+ for doc in documents:
631
+ try:
632
+ if os.path.exists(doc):
633
+ logger.info(f"Indexing document: {doc}")
634
+ result = self.rag.index_document(doc)
635
+
636
+ if result.get("success"):
637
+ self.indexed_files.add(doc)
638
+ logger.info(
639
+ f"Successfully indexed: {doc} ({result.get('num_chunks', 0)} chunks)"
640
+ )
641
+ else:
642
+ error = result.get("error", "Unknown error")
643
+ logger.error(f"Failed to index {doc}: {error}")
644
+ else:
645
+ logger.warning(f"Document not found: {doc}")
646
+ except Exception as e:
647
+ logger.error(f"Failed to index {doc}: {e}")
648
+
649
+ # Update system prompt after indexing to include the new documents
650
+ self._update_system_prompt()
651
+
652
+ def _update_system_prompt(self) -> None:
653
+ """Update the system prompt with current indexed documents."""
654
+ # Regenerate the system prompt with updated document list
655
+ self.system_prompt = self._get_system_prompt()
656
+
657
+ # Add the tools description using the parent class method
658
+ tools_description = self._format_tools_for_prompt()
659
+ self.system_prompt += f"\n\n==== AVAILABLE TOOLS ====\n{tools_description}\n\n"
660
+
661
+ if self.rag:
662
+ logger.debug(
663
+ f"Updated system prompt with {len(self.rag.indexed_files)} indexed documents"
664
+ )
665
+
666
+ def _start_watching(self) -> None:
667
+ """Start watching directories for changes."""
668
+ for directory in self.watch_directories:
669
+ self._watch_directory(directory)
670
+
671
+ def _watch_directory(self, directory: str) -> None:
672
+ """Watch a directory for file changes."""
673
+ if not check_watchdog_available():
674
+ error_msg = (
675
+ "\n❌ Error: Missing required package 'watchdog'\n\n"
676
+ "File watching requires the watchdog package.\n"
677
+ "Please install the required dependencies:\n"
678
+ ' uv pip install -e ".[dev]"\n\n'
679
+ "Or install watchdog directly:\n"
680
+ ' uv pip install "watchdog>=2.1.0"\n'
681
+ )
682
+ logger.error(error_msg)
683
+ raise ImportError(error_msg)
684
+
685
+ try:
686
+ # Use generic FileChangeHandler with callbacks
687
+ event_handler = FileChangeHandler(
688
+ on_created=self.reindex_file,
689
+ on_modified=self.reindex_file,
690
+ on_deleted=self._handle_file_deletion,
691
+ on_moved=self._handle_file_move,
692
+ )
693
+ observer = Observer()
694
+ observer.schedule(event_handler, directory, recursive=True)
695
+ observer.start()
696
+ self.observers.append(observer)
697
+ logger.info(f"Started watching: {directory}")
698
+ except Exception as e:
699
+ logger.error(f"Failed to watch {directory}: {e}")
700
+
701
+ def _handle_file_deletion(self, file_path: str) -> None:
702
+ """Handle file deletion by removing it from the index."""
703
+ if not self.rag:
704
+ return
705
+
706
+ try:
707
+ file_abs_path = str(Path(file_path).absolute())
708
+ if file_abs_path in self.indexed_files:
709
+ logger.info(f"File deleted, removing from index: {file_path}")
710
+ if self.rag.remove_document(file_abs_path):
711
+ self.indexed_files.discard(file_abs_path)
712
+ logger.info(
713
+ f"Successfully removed deleted file from index: {file_path}"
714
+ )
715
+ else:
716
+ logger.warning(
717
+ f"Failed to remove deleted file from index: {file_path}"
718
+ )
719
+ except Exception as e:
720
+ logger.error(f"Error handling file deletion {file_path}: {e}")
721
+
722
+ def _handle_file_move(self, src_path: str, dest_path: str) -> None:
723
+ """Handle file move by removing old path and indexing new path."""
724
+ self._handle_file_deletion(src_path)
725
+ self.reindex_file(dest_path)
726
+
727
+ def reindex_file(self, file_path: str) -> None:
728
+ """Reindex a file that was modified or created."""
729
+ if not self.rag:
730
+ logger.warning(
731
+ f"Cannot reindex {file_path}: RAG dependencies not installed"
732
+ )
733
+ return
734
+
735
+ try:
736
+ logger.info(f"Reindexing: {file_path}")
737
+ # Use the new reindex_document method which removes old chunks first
738
+ result = self.rag.reindex_document(file_path)
739
+ if result.get("success"):
740
+ self.indexed_files.add(file_path)
741
+ logger.info(f"Successfully reindexed {file_path}")
742
+ else:
743
+ error = result.get("error", "Unknown error")
744
+ logger.error(f"Failed to reindex {file_path}: {error}")
745
+ except Exception as e:
746
+ logger.error(f"Failed to reindex {file_path}: {e}")
747
+
748
+ def stop_watching(self) -> None:
749
+ """Stop all file system observers."""
750
+ for observer in self.observers:
751
+ observer.stop()
752
+ observer.join()
753
+ self.observers.clear()
754
+
755
+ def load_session(self, session_id: str) -> bool:
756
+ """
757
+ Load a saved session.
758
+
759
+ Args:
760
+ session_id: Session ID to load
761
+
762
+ Returns:
763
+ True if successful
764
+ """
765
+ try:
766
+ session = self.session_manager.load_session(session_id)
767
+ if not session:
768
+ logger.error(f"Session not found: {session_id}")
769
+ return False
770
+
771
+ self.current_session = session
772
+
773
+ # Restore indexed documents (only if RAG is available)
774
+ if self.rag:
775
+ for doc_path in session.indexed_documents:
776
+ if os.path.exists(doc_path):
777
+ try:
778
+ self.rag.index_document(doc_path)
779
+ self.indexed_files.add(doc_path)
780
+ except Exception as e:
781
+ logger.warning(f"Failed to reindex {doc_path}: {e}")
782
+ elif session.indexed_documents:
783
+ logger.warning(
784
+ f"Cannot restore {len(session.indexed_documents)} indexed documents: "
785
+ "RAG dependencies not installed"
786
+ )
787
+
788
+ # Restore watched directories
789
+ for dir_path in session.watched_directories:
790
+ if os.path.exists(dir_path) and dir_path not in self.watch_directories:
791
+ self.watch_directories.append(dir_path)
792
+ self._watch_directory(dir_path)
793
+
794
+ # Restore conversation history
795
+ self.conversation_history = list(session.chat_history)
796
+
797
+ logger.info(
798
+ f"Loaded session {session_id}: {len(session.indexed_documents)} docs, {len(session.chat_history)} messages"
799
+ )
800
+ return True
801
+
802
+ except Exception as e:
803
+ logger.error(f"Error loading session: {e}")
804
+ return False
805
+
806
+ def save_current_session(self) -> bool:
807
+ """
808
+ Save the current session.
809
+
810
+ Returns:
811
+ True if successful
812
+ """
813
+ try:
814
+ if not self.current_session:
815
+ # Create new session
816
+ self.current_session = self.session_manager.create_session()
817
+
818
+ # Update session data
819
+ self.current_session.indexed_documents = list(self.indexed_files)
820
+ self.current_session.watched_directories = list(self.watch_directories)
821
+ self.current_session.chat_history = list(self.conversation_history)
822
+
823
+ # Save
824
+ return self.session_manager.save_session(self.current_session)
825
+
826
+ except Exception as e:
827
+ logger.error(f"Error saving session: {e}")
828
+ return False
829
+
830
+ def __del__(self):
831
+ """Cleanup when agent is destroyed."""
832
+ try:
833
+ self.stop_watching()
834
+ except Exception as e:
835
+ logger.error(f"Error stopping file watchers during cleanup: {e}")