amd-gaia 0.14.3__py3-none-any.whl → 0.15.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (181) hide show
  1. {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/METADATA +223 -223
  2. amd_gaia-0.15.1.dist-info/RECORD +178 -0
  3. {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/entry_points.txt +1 -0
  4. {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/licenses/LICENSE.md +20 -20
  5. gaia/__init__.py +29 -29
  6. gaia/agents/__init__.py +19 -19
  7. gaia/agents/base/__init__.py +9 -9
  8. gaia/agents/base/agent.py +2177 -2177
  9. gaia/agents/base/api_agent.py +120 -120
  10. gaia/agents/base/console.py +1841 -1841
  11. gaia/agents/base/errors.py +237 -237
  12. gaia/agents/base/mcp_agent.py +86 -86
  13. gaia/agents/base/tools.py +83 -83
  14. gaia/agents/blender/agent.py +556 -556
  15. gaia/agents/blender/agent_simple.py +133 -135
  16. gaia/agents/blender/app.py +211 -211
  17. gaia/agents/blender/app_simple.py +41 -41
  18. gaia/agents/blender/core/__init__.py +16 -16
  19. gaia/agents/blender/core/materials.py +506 -506
  20. gaia/agents/blender/core/objects.py +316 -316
  21. gaia/agents/blender/core/rendering.py +225 -225
  22. gaia/agents/blender/core/scene.py +220 -220
  23. gaia/agents/blender/core/view.py +146 -146
  24. gaia/agents/chat/__init__.py +9 -9
  25. gaia/agents/chat/agent.py +835 -835
  26. gaia/agents/chat/app.py +1058 -1058
  27. gaia/agents/chat/session.py +508 -508
  28. gaia/agents/chat/tools/__init__.py +15 -15
  29. gaia/agents/chat/tools/file_tools.py +96 -96
  30. gaia/agents/chat/tools/rag_tools.py +1729 -1729
  31. gaia/agents/chat/tools/shell_tools.py +436 -436
  32. gaia/agents/code/__init__.py +7 -7
  33. gaia/agents/code/agent.py +549 -549
  34. gaia/agents/code/cli.py +377 -0
  35. gaia/agents/code/models.py +135 -135
  36. gaia/agents/code/orchestration/__init__.py +24 -24
  37. gaia/agents/code/orchestration/checklist_executor.py +1763 -1763
  38. gaia/agents/code/orchestration/checklist_generator.py +713 -713
  39. gaia/agents/code/orchestration/factories/__init__.py +9 -9
  40. gaia/agents/code/orchestration/factories/base.py +63 -63
  41. gaia/agents/code/orchestration/factories/nextjs_factory.py +118 -118
  42. gaia/agents/code/orchestration/factories/python_factory.py +106 -106
  43. gaia/agents/code/orchestration/orchestrator.py +841 -841
  44. gaia/agents/code/orchestration/project_analyzer.py +391 -391
  45. gaia/agents/code/orchestration/steps/__init__.py +67 -67
  46. gaia/agents/code/orchestration/steps/base.py +188 -188
  47. gaia/agents/code/orchestration/steps/error_handler.py +314 -314
  48. gaia/agents/code/orchestration/steps/nextjs.py +828 -828
  49. gaia/agents/code/orchestration/steps/python.py +307 -307
  50. gaia/agents/code/orchestration/template_catalog.py +469 -469
  51. gaia/agents/code/orchestration/workflows/__init__.py +14 -14
  52. gaia/agents/code/orchestration/workflows/base.py +80 -80
  53. gaia/agents/code/orchestration/workflows/nextjs.py +186 -186
  54. gaia/agents/code/orchestration/workflows/python.py +94 -94
  55. gaia/agents/code/prompts/__init__.py +11 -11
  56. gaia/agents/code/prompts/base_prompt.py +77 -77
  57. gaia/agents/code/prompts/code_patterns.py +2036 -2036
  58. gaia/agents/code/prompts/nextjs_prompt.py +40 -40
  59. gaia/agents/code/prompts/python_prompt.py +109 -109
  60. gaia/agents/code/schema_inference.py +365 -365
  61. gaia/agents/code/system_prompt.py +41 -41
  62. gaia/agents/code/tools/__init__.py +42 -42
  63. gaia/agents/code/tools/cli_tools.py +1138 -1138
  64. gaia/agents/code/tools/code_formatting.py +319 -319
  65. gaia/agents/code/tools/code_tools.py +769 -769
  66. gaia/agents/code/tools/error_fixing.py +1347 -1347
  67. gaia/agents/code/tools/external_tools.py +180 -180
  68. gaia/agents/code/tools/file_io.py +845 -845
  69. gaia/agents/code/tools/prisma_tools.py +190 -190
  70. gaia/agents/code/tools/project_management.py +1016 -1016
  71. gaia/agents/code/tools/testing.py +321 -321
  72. gaia/agents/code/tools/typescript_tools.py +122 -122
  73. gaia/agents/code/tools/validation_parsing.py +461 -461
  74. gaia/agents/code/tools/validation_tools.py +806 -806
  75. gaia/agents/code/tools/web_dev_tools.py +1758 -1758
  76. gaia/agents/code/validators/__init__.py +16 -16
  77. gaia/agents/code/validators/antipattern_checker.py +241 -241
  78. gaia/agents/code/validators/ast_analyzer.py +197 -197
  79. gaia/agents/code/validators/requirements_validator.py +145 -145
  80. gaia/agents/code/validators/syntax_validator.py +171 -171
  81. gaia/agents/docker/__init__.py +7 -7
  82. gaia/agents/docker/agent.py +642 -642
  83. gaia/agents/emr/__init__.py +8 -8
  84. gaia/agents/emr/agent.py +1506 -1506
  85. gaia/agents/emr/cli.py +1322 -1322
  86. gaia/agents/emr/constants.py +475 -475
  87. gaia/agents/emr/dashboard/__init__.py +4 -4
  88. gaia/agents/emr/dashboard/server.py +1974 -1974
  89. gaia/agents/jira/__init__.py +11 -11
  90. gaia/agents/jira/agent.py +894 -894
  91. gaia/agents/jira/jql_templates.py +299 -299
  92. gaia/agents/routing/__init__.py +7 -7
  93. gaia/agents/routing/agent.py +567 -570
  94. gaia/agents/routing/system_prompt.py +75 -75
  95. gaia/agents/summarize/__init__.py +11 -0
  96. gaia/agents/summarize/agent.py +885 -0
  97. gaia/agents/summarize/prompts.py +129 -0
  98. gaia/api/__init__.py +23 -23
  99. gaia/api/agent_registry.py +238 -238
  100. gaia/api/app.py +305 -305
  101. gaia/api/openai_server.py +575 -575
  102. gaia/api/schemas.py +186 -186
  103. gaia/api/sse_handler.py +373 -373
  104. gaia/apps/__init__.py +4 -4
  105. gaia/apps/llm/__init__.py +6 -6
  106. gaia/apps/llm/app.py +173 -169
  107. gaia/apps/summarize/app.py +116 -633
  108. gaia/apps/summarize/html_viewer.py +133 -133
  109. gaia/apps/summarize/pdf_formatter.py +284 -284
  110. gaia/audio/__init__.py +2 -2
  111. gaia/audio/audio_client.py +439 -439
  112. gaia/audio/audio_recorder.py +269 -269
  113. gaia/audio/kokoro_tts.py +599 -599
  114. gaia/audio/whisper_asr.py +432 -432
  115. gaia/chat/__init__.py +16 -16
  116. gaia/chat/app.py +430 -430
  117. gaia/chat/prompts.py +522 -522
  118. gaia/chat/sdk.py +1228 -1225
  119. gaia/cli.py +5481 -5621
  120. gaia/database/__init__.py +10 -10
  121. gaia/database/agent.py +176 -176
  122. gaia/database/mixin.py +290 -290
  123. gaia/database/testing.py +64 -64
  124. gaia/eval/batch_experiment.py +2332 -2332
  125. gaia/eval/claude.py +542 -542
  126. gaia/eval/config.py +37 -37
  127. gaia/eval/email_generator.py +512 -512
  128. gaia/eval/eval.py +3179 -3179
  129. gaia/eval/groundtruth.py +1130 -1130
  130. gaia/eval/transcript_generator.py +582 -582
  131. gaia/eval/webapp/README.md +167 -167
  132. gaia/eval/webapp/package-lock.json +875 -875
  133. gaia/eval/webapp/package.json +20 -20
  134. gaia/eval/webapp/public/app.js +3402 -3402
  135. gaia/eval/webapp/public/index.html +87 -87
  136. gaia/eval/webapp/public/styles.css +3661 -3661
  137. gaia/eval/webapp/server.js +415 -415
  138. gaia/eval/webapp/test-setup.js +72 -72
  139. gaia/llm/__init__.py +9 -2
  140. gaia/llm/base_client.py +60 -0
  141. gaia/llm/exceptions.py +12 -0
  142. gaia/llm/factory.py +70 -0
  143. gaia/llm/lemonade_client.py +3236 -3221
  144. gaia/llm/lemonade_manager.py +294 -294
  145. gaia/llm/providers/__init__.py +9 -0
  146. gaia/llm/providers/claude.py +108 -0
  147. gaia/llm/providers/lemonade.py +120 -0
  148. gaia/llm/providers/openai_provider.py +79 -0
  149. gaia/llm/vlm_client.py +382 -382
  150. gaia/logger.py +189 -189
  151. gaia/mcp/agent_mcp_server.py +245 -245
  152. gaia/mcp/blender_mcp_client.py +138 -138
  153. gaia/mcp/blender_mcp_server.py +648 -648
  154. gaia/mcp/context7_cache.py +332 -332
  155. gaia/mcp/external_services.py +518 -518
  156. gaia/mcp/mcp_bridge.py +811 -550
  157. gaia/mcp/servers/__init__.py +6 -6
  158. gaia/mcp/servers/docker_mcp.py +83 -83
  159. gaia/perf_analysis.py +361 -0
  160. gaia/rag/__init__.py +10 -10
  161. gaia/rag/app.py +293 -293
  162. gaia/rag/demo.py +304 -304
  163. gaia/rag/pdf_utils.py +235 -235
  164. gaia/rag/sdk.py +2194 -2194
  165. gaia/security.py +163 -163
  166. gaia/talk/app.py +289 -289
  167. gaia/talk/sdk.py +538 -538
  168. gaia/testing/__init__.py +87 -87
  169. gaia/testing/assertions.py +330 -330
  170. gaia/testing/fixtures.py +333 -333
  171. gaia/testing/mocks.py +493 -493
  172. gaia/util.py +46 -46
  173. gaia/utils/__init__.py +33 -33
  174. gaia/utils/file_watcher.py +675 -675
  175. gaia/utils/parsing.py +223 -223
  176. gaia/version.py +100 -100
  177. amd_gaia-0.14.3.dist-info/RECORD +0 -168
  178. gaia/agents/code/app.py +0 -266
  179. gaia/llm/llm_client.py +0 -729
  180. {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/WHEEL +0 -0
  181. {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/top_level.txt +0 -0
@@ -1,675 +1,675 @@
1
- # Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
2
- # SPDX-License-Identifier: MIT
3
-
4
- """
5
- Generic file watching utilities for GAIA agents.
6
-
7
- Provides FileChangeHandler and FileWatcher for monitoring directories
8
- and responding to file system events with callbacks.
9
-
10
- Also provides file hashing utilities for duplicate detection.
11
-
12
- Example:
13
- from gaia.utils import FileChangeHandler, FileWatcher, compute_file_hash
14
-
15
- def on_new_file(path: str):
16
- print(f"New file: {path}")
17
- file_hash = compute_file_hash(path)
18
- print(f"Hash: {file_hash}")
19
-
20
- watcher = FileWatcher(
21
- directory="./data",
22
- on_created=on_new_file,
23
- extensions=[".pdf", ".txt"],
24
- )
25
- watcher.start()
26
- """
27
-
28
- import hashlib
29
- import logging
30
- import time
31
- from pathlib import Path
32
- from typing import Any, Callable, Dict, List, Optional, Set, Union
33
-
34
- try:
35
- from watchdog.events import FileSystemEvent, FileSystemEventHandler
36
- from watchdog.observers import Observer
37
-
38
- WATCHDOG_AVAILABLE = True
39
- except ImportError:
40
- # Create dummy base class when watchdog is not available
41
- class FileSystemEventHandler:
42
- """Dummy base class when watchdog is not installed."""
43
-
44
- class FileSystemEvent:
45
- """Dummy event class when watchdog is not installed."""
46
-
47
- src_path: str = ""
48
- dest_path: str = ""
49
- is_directory: bool = False
50
-
51
- Observer = None
52
- WATCHDOG_AVAILABLE = False
53
-
54
- logger = logging.getLogger(__name__)
55
-
56
- # Type alias for event callbacks
57
- EventCallback = Callable[[str], None]
58
- MoveCallback = Callable[[str, str], None] # (src_path, dest_path)
59
- FilterCallback = Callable[[str], bool]
60
-
61
- # Default chunk size for file hashing (64KB)
62
- HASH_CHUNK_SIZE = 65536
63
-
64
-
65
- def compute_file_hash(
66
- path: Union[str, Path],
67
- algorithm: str = "sha256",
68
- chunk_size: int = HASH_CHUNK_SIZE,
69
- ) -> Optional[str]:
70
- """
71
- Compute a hash of a file's contents.
72
-
73
- Uses chunked reading to handle large files efficiently without
74
- loading the entire file into memory.
75
-
76
- Args:
77
- path: Path to the file to hash.
78
- algorithm: Hash algorithm to use (default: sha256).
79
- Supports any algorithm from hashlib.
80
- chunk_size: Size of chunks to read at a time (default: 64KB).
81
-
82
- Returns:
83
- Hex-encoded hash string, or None if file cannot be read.
84
-
85
- Example:
86
- from gaia.utils import compute_file_hash
87
-
88
- # Check if file was already processed
89
- file_hash = compute_file_hash("intake_form.pdf")
90
- if file_hash in processed_hashes:
91
- print("Already processed")
92
- else:
93
- process_file("intake_form.pdf")
94
- processed_hashes.add(file_hash)
95
- """
96
- try:
97
- file_path = Path(path)
98
- if not file_path.exists() or not file_path.is_file():
99
- return None
100
-
101
- hasher = hashlib.new(algorithm)
102
- with open(file_path, "rb") as f:
103
- while chunk := f.read(chunk_size):
104
- hasher.update(chunk)
105
- return hasher.hexdigest()
106
- except (OSError, IOError, ValueError) as e:
107
- logger.warning(f"Could not compute hash for {path}: {e}")
108
- return None
109
-
110
-
111
- def compute_bytes_hash(
112
- data: bytes,
113
- algorithm: str = "sha256",
114
- ) -> str:
115
- """
116
- Compute a hash of bytes data.
117
-
118
- Useful when the file content is already loaded in memory.
119
-
120
- Args:
121
- data: Bytes to hash.
122
- algorithm: Hash algorithm to use (default: sha256).
123
-
124
- Returns:
125
- Hex-encoded hash string.
126
-
127
- Example:
128
- from gaia.utils import compute_bytes_hash
129
-
130
- with open("file.pdf", "rb") as f:
131
- content = f.read()
132
- file_hash = compute_bytes_hash(content)
133
- """
134
- hasher = hashlib.new(algorithm)
135
- hasher.update(data)
136
- return hasher.hexdigest()
137
-
138
-
139
- class FileChangeHandler(FileSystemEventHandler):
140
- """
141
- Generic handler for file system events.
142
-
143
- A flexible, callback-based file system event handler that can be used
144
- with any agent or application. Supports:
145
- - Callbacks for created, modified, deleted, and moved events
146
- - File extension filtering
147
- - Custom filter predicates
148
- - Debouncing to prevent duplicate events
149
- - Telemetry tracking
150
-
151
- Example:
152
- from gaia.utils import FileChangeHandler
153
- from watchdog.observers import Observer
154
-
155
- def handle_new_file(path: str):
156
- print(f"Processing: {path}")
157
-
158
- handler = FileChangeHandler(
159
- on_created=handle_new_file,
160
- extensions=[".pdf", ".png", ".jpg"],
161
- debounce_seconds=2.0,
162
- )
163
-
164
- observer = Observer()
165
- observer.schedule(handler, "./intake_forms", recursive=False)
166
- observer.start()
167
- """
168
-
169
- # Default extensions for document processing
170
- DEFAULT_EXTENSIONS: List[str] = [
171
- ".pdf",
172
- ".txt",
173
- ".md",
174
- ".markdown",
175
- ".csv",
176
- ".json",
177
- ".py",
178
- ".js",
179
- ".ts",
180
- ".java",
181
- ".cpp",
182
- ".c",
183
- ".html",
184
- ".css",
185
- ".yaml",
186
- ".yml",
187
- ".xml",
188
- ".rst",
189
- ".log",
190
- ]
191
-
192
- def __init__(
193
- self,
194
- on_created: Optional[EventCallback] = None,
195
- on_modified: Optional[EventCallback] = None,
196
- on_deleted: Optional[EventCallback] = None,
197
- on_moved: Optional[MoveCallback] = None,
198
- extensions: Optional[List[str]] = None,
199
- filter_func: Optional[FilterCallback] = None,
200
- debounce_seconds: float = 2.0,
201
- ignore_directories: bool = True,
202
- ):
203
- """
204
- Initialize FileChangeHandler.
205
-
206
- Args:
207
- on_created: Callback for file creation. Receives file path.
208
- on_modified: Callback for file modification. Receives file path.
209
- on_deleted: Callback for file deletion. Receives file path.
210
- on_moved: Callback for file move/rename. Receives (src_path, dest_path).
211
- extensions: List of file extensions to watch (e.g., [".pdf", ".txt"]).
212
- If None, uses DEFAULT_EXTENSIONS.
213
- If empty list [], watches all files.
214
- filter_func: Custom filter function. If provided, called with file path
215
- and should return True to process the event.
216
- Takes precedence over extensions filter.
217
- debounce_seconds: Minimum time between processing same file.
218
- ignore_directories: If True, ignores directory events.
219
-
220
- Example:
221
- # Watch only PDFs and images
222
- handler = FileChangeHandler(
223
- on_created=process_file,
224
- extensions=[".pdf", ".png", ".jpg"],
225
- )
226
-
227
- # Watch all files with custom filter
228
- handler = FileChangeHandler(
229
- on_created=process_file,
230
- extensions=[], # Watch all
231
- filter_func=lambda p: not p.startswith("."), # Exclude hidden
232
- )
233
- """
234
- super().__init__()
235
- self._on_created = on_created
236
- self._on_modified = on_modified
237
- self._on_deleted = on_deleted
238
- self._on_moved = on_moved
239
-
240
- # Set up extensions filter
241
- if extensions is None:
242
- self._extensions: Set[str] = set(self.DEFAULT_EXTENSIONS)
243
- else:
244
- # Normalize extensions to lowercase with leading dot
245
- self._extensions = {
246
- ext.lower() if ext.startswith(".") else f".{ext.lower()}"
247
- for ext in extensions
248
- }
249
-
250
- self._filter_func = filter_func
251
- self._debounce_seconds = debounce_seconds
252
- self._ignore_directories = ignore_directories
253
-
254
- # Debounce tracking
255
- self._last_processed: Dict[str, float] = {}
256
- self._max_cache_size = 1000
257
-
258
- # Telemetry
259
- self._telemetry: Dict[str, Any] = {
260
- "files_created": 0,
261
- "files_modified": 0,
262
- "files_deleted": 0,
263
- "files_moved": 0,
264
- "total_events": 0,
265
- "last_event_time": None,
266
- }
267
-
268
- def _should_process(self, file_path: str) -> bool:
269
- """Check if file should be processed based on filters."""
270
- # Custom filter takes precedence
271
- if self._filter_func is not None:
272
- return self._filter_func(file_path)
273
-
274
- # Empty extensions list means watch all files
275
- if not self._extensions:
276
- return True
277
-
278
- # Check extension
279
- file_lower = file_path.lower()
280
- return any(file_lower.endswith(ext) for ext in self._extensions)
281
-
282
- def _is_debounced(self, file_path: str) -> bool:
283
- """Check if file was recently processed (within debounce window)."""
284
- current_time = time.time()
285
- last_time = self._last_processed.get(file_path, 0)
286
-
287
- if current_time - last_time <= self._debounce_seconds:
288
- return True
289
-
290
- # Update last processed time
291
- self._last_processed[file_path] = current_time
292
-
293
- # LRU cache eviction to prevent memory leaks
294
- if len(self._last_processed) > self._max_cache_size:
295
- num_to_remove = self._max_cache_size // 10
296
- sorted_items = sorted(self._last_processed.items(), key=lambda x: x[1])
297
- for path, _ in sorted_items[:num_to_remove]:
298
- del self._last_processed[path]
299
- logger.debug(f"Cleaned up {num_to_remove} old entries from debounce cache")
300
-
301
- return False
302
-
303
- def _update_telemetry(self, event_type: str) -> None:
304
- """Update telemetry statistics."""
305
- self._telemetry[event_type] += 1
306
- self._telemetry["total_events"] += 1
307
- self._telemetry["last_event_time"] = time.time()
308
-
309
- # Log telemetry periodically
310
- if self._telemetry["total_events"] % 10 == 0:
311
- logger.debug(
312
- f"File Watch Telemetry: "
313
- f"Created: {self._telemetry['files_created']}, "
314
- f"Modified: {self._telemetry['files_modified']}, "
315
- f"Deleted: {self._telemetry['files_deleted']}, "
316
- f"Moved: {self._telemetry['files_moved']}, "
317
- f"Total: {self._telemetry['total_events']}"
318
- )
319
-
320
- def on_created(self, event: FileSystemEvent) -> None:
321
- """Handle file creation."""
322
- if self._ignore_directories and event.is_directory:
323
- return
324
-
325
- if self._on_created and self._should_process(event.src_path):
326
- if not self._is_debounced(event.src_path):
327
- logger.debug(f"File created: {event.src_path}")
328
- try:
329
- self._on_created(event.src_path)
330
- self._update_telemetry("files_created")
331
- except Exception as e:
332
- logger.error(
333
- f"Error in on_created callback for {event.src_path}: {e}"
334
- )
335
-
336
- def on_modified(self, event: FileSystemEvent) -> None:
337
- """Handle file modification."""
338
- if self._ignore_directories and event.is_directory:
339
- return
340
-
341
- if self._on_modified and self._should_process(event.src_path):
342
- if not self._is_debounced(event.src_path):
343
- logger.debug(f"File modified: {event.src_path}")
344
- try:
345
- self._on_modified(event.src_path)
346
- self._update_telemetry("files_modified")
347
- except Exception as e:
348
- logger.error(
349
- f"Error in on_modified callback for {event.src_path}: {e}"
350
- )
351
-
352
- def on_deleted(self, event: FileSystemEvent) -> None:
353
- """Handle file deletion."""
354
- if self._ignore_directories and event.is_directory:
355
- return
356
-
357
- if self._on_deleted and self._should_process(event.src_path):
358
- logger.debug(f"File deleted: {event.src_path}")
359
- try:
360
- self._on_deleted(event.src_path)
361
- self._update_telemetry("files_deleted")
362
- # Clean up from debounce cache
363
- self._last_processed.pop(event.src_path, None)
364
- except Exception as e:
365
- logger.error(f"Error in on_deleted callback for {event.src_path}: {e}")
366
-
367
- def on_moved(self, event: FileSystemEvent) -> None:
368
- """Handle file move/rename."""
369
- if self._ignore_directories and event.is_directory:
370
- return
371
-
372
- src_path = event.src_path
373
- dest_path = getattr(event, "dest_path", None)
374
-
375
- if self._on_moved and dest_path:
376
- # Process if either source or destination matches filter
377
- if self._should_process(src_path) or self._should_process(dest_path):
378
- logger.debug(f"File moved: {src_path} -> {dest_path}")
379
- try:
380
- self._on_moved(src_path, dest_path)
381
- self._update_telemetry("files_moved")
382
- # Update debounce cache
383
- self._last_processed.pop(src_path, None)
384
- except Exception as e:
385
- logger.error(f"Error in on_moved callback for {src_path}: {e}")
386
-
387
- @property
388
- def telemetry(self) -> Dict[str, Any]:
389
- """Get current telemetry statistics."""
390
- return self._telemetry.copy()
391
-
392
- def reset_telemetry(self) -> None:
393
- """Reset telemetry counters."""
394
- self._telemetry = {
395
- "files_created": 0,
396
- "files_modified": 0,
397
- "files_deleted": 0,
398
- "files_moved": 0,
399
- "total_events": 0,
400
- "last_event_time": None,
401
- }
402
-
403
-
404
- class FileWatcher:
405
- """
406
- Convenience wrapper for watching a directory with FileChangeHandler.
407
-
408
- Combines Observer and FileChangeHandler for easy directory watching.
409
- Handles start/stop lifecycle and provides a clean API.
410
-
411
- Example:
412
- from gaia.utils import FileWatcher
413
-
414
- def process_intake(path: str):
415
- print(f"Processing intake form: {path}")
416
-
417
- watcher = FileWatcher(
418
- directory="./intake_forms",
419
- on_created=process_intake,
420
- extensions=[".pdf", ".png", ".jpg"],
421
- )
422
-
423
- watcher.start()
424
- # ... do work ...
425
- watcher.stop()
426
-
427
- # Or use as context manager:
428
- with FileWatcher("./data", on_created=process) as watcher:
429
- # watcher is running
430
- pass
431
- # watcher is stopped
432
- """
433
-
434
- def __init__(
435
- self,
436
- directory: Union[str, Path],
437
- on_created: Optional[EventCallback] = None,
438
- on_modified: Optional[EventCallback] = None,
439
- on_deleted: Optional[EventCallback] = None,
440
- on_moved: Optional[MoveCallback] = None,
441
- extensions: Optional[List[str]] = None,
442
- filter_func: Optional[FilterCallback] = None,
443
- debounce_seconds: float = 2.0,
444
- recursive: bool = False,
445
- ):
446
- """
447
- Initialize FileWatcher.
448
-
449
- Args:
450
- directory: Directory path to watch.
451
- on_created: Callback for file creation.
452
- on_modified: Callback for file modification.
453
- on_deleted: Callback for file deletion.
454
- on_moved: Callback for file move/rename.
455
- extensions: File extensions to watch. None uses defaults, [] watches all.
456
- filter_func: Custom filter predicate.
457
- debounce_seconds: Debounce time between processing same file.
458
- recursive: If True, watch subdirectories recursively.
459
-
460
- Raises:
461
- ImportError: If watchdog package is not installed.
462
- FileNotFoundError: If directory does not exist.
463
- """
464
- if not WATCHDOG_AVAILABLE:
465
- raise ImportError(
466
- "FileWatcher requires the 'watchdog' package.\n"
467
- "Install with: pip install 'watchdog>=2.1.0'\n"
468
- "Or: uv pip install -e '.[dev]'"
469
- )
470
-
471
- self._directory = Path(directory)
472
- if not self._directory.exists():
473
- raise FileNotFoundError(f"Directory does not exist: {directory}")
474
-
475
- self._recursive = recursive
476
- self._observer: Optional[Observer] = None
477
-
478
- self._handler = FileChangeHandler(
479
- on_created=on_created,
480
- on_modified=on_modified,
481
- on_deleted=on_deleted,
482
- on_moved=on_moved,
483
- extensions=extensions,
484
- filter_func=filter_func,
485
- debounce_seconds=debounce_seconds,
486
- )
487
-
488
- def start(self) -> None:
489
- """
490
- Start watching the directory.
491
-
492
- Safe to call multiple times - will not start multiple observers.
493
- """
494
- if self._observer is not None:
495
- logger.warning("FileWatcher already running")
496
- return
497
-
498
- self._observer = Observer()
499
- self._observer.schedule(
500
- self._handler,
501
- str(self._directory),
502
- recursive=self._recursive,
503
- )
504
- self._observer.start()
505
- logger.info(
506
- f"Started watching: {self._directory} " f"(recursive={self._recursive})"
507
- )
508
-
509
- def stop(self) -> None:
510
- """
511
- Stop watching the directory.
512
-
513
- Safe to call multiple times.
514
- """
515
- if self._observer is not None:
516
- self._observer.stop()
517
- self._observer.join(timeout=5.0)
518
- self._observer = None
519
- logger.info(f"Stopped watching: {self._directory}")
520
-
521
- @property
522
- def is_running(self) -> bool:
523
- """True if watcher is currently running."""
524
- return self._observer is not None and self._observer.is_alive()
525
-
526
- @property
527
- def directory(self) -> Path:
528
- """Directory being watched."""
529
- return self._directory
530
-
531
- @property
532
- def telemetry(self) -> Dict[str, Any]:
533
- """Get telemetry from the handler."""
534
- return self._handler.telemetry
535
-
536
- def __enter__(self) -> "FileWatcher":
537
- """Context manager entry - starts watching."""
538
- self.start()
539
- return self
540
-
541
- def __exit__(self, exc_type, exc_val, exc_tb) -> None:
542
- """Context manager exit - stops watching."""
543
- self.stop()
544
-
545
-
546
- def check_watchdog_available() -> bool:
547
- """Check if watchdog package is available."""
548
- return WATCHDOG_AVAILABLE
549
-
550
-
551
- class FileWatcherMixin:
552
- """
553
- Mixin providing file watching capabilities for GAIA agents.
554
-
555
- Manages multiple FileWatcher instances with automatic cleanup.
556
-
557
- Example:
558
- from gaia import Agent, FileWatcherMixin
559
-
560
- class IntakeAgent(Agent, FileWatcherMixin):
561
- def __init__(self, **kwargs):
562
- super().__init__(**kwargs)
563
-
564
- self.watch_directory(
565
- "./intake_forms",
566
- on_created=self._process_form,
567
- extensions=[".pdf", ".png"],
568
- )
569
-
570
- def _process_form(self, path: str):
571
- print(f"Processing: {path}")
572
- """
573
-
574
- _watchers: List[FileWatcher]
575
-
576
- def watch_directory(
577
- self,
578
- directory: Union[str, Path],
579
- on_created: Optional[EventCallback] = None,
580
- on_modified: Optional[EventCallback] = None,
581
- on_deleted: Optional[EventCallback] = None,
582
- on_moved: Optional[MoveCallback] = None,
583
- extensions: Optional[List[str]] = None,
584
- filter_func: Optional[FilterCallback] = None,
585
- debounce_seconds: float = 2.0,
586
- recursive: bool = False,
587
- auto_start: bool = True,
588
- ) -> FileWatcher:
589
- """
590
- Watch a directory for file changes.
591
-
592
- Args:
593
- directory: Directory path to watch.
594
- on_created: Callback for file creation.
595
- on_modified: Callback for file modification.
596
- on_deleted: Callback for file deletion.
597
- on_moved: Callback for file move/rename.
598
- extensions: File extensions to watch. None uses defaults, [] watches all.
599
- filter_func: Custom filter predicate.
600
- debounce_seconds: Debounce time between processing same file.
601
- recursive: If True, watch subdirectories recursively.
602
- auto_start: If True, start watching immediately.
603
-
604
- Returns:
605
- The FileWatcher instance.
606
-
607
- Example:
608
- self.watch_directory(
609
- "./data",
610
- on_created=self.handle_new_file,
611
- extensions=[".pdf", ".txt"],
612
- )
613
- """
614
- # Initialize watchers list if needed
615
- if not hasattr(self, "_watchers"):
616
- self._watchers = []
617
-
618
- watcher = FileWatcher(
619
- directory=directory,
620
- on_created=on_created,
621
- on_modified=on_modified,
622
- on_deleted=on_deleted,
623
- on_moved=on_moved,
624
- extensions=extensions,
625
- filter_func=filter_func,
626
- debounce_seconds=debounce_seconds,
627
- recursive=recursive,
628
- )
629
-
630
- self._watchers.append(watcher)
631
-
632
- if auto_start:
633
- watcher.start()
634
-
635
- return watcher
636
-
637
- def stop_all_watchers(self) -> None:
638
- """Stop all file watchers."""
639
- if hasattr(self, "_watchers"):
640
- for watcher in self._watchers:
641
- watcher.stop()
642
- logger.info(f"Stopped {len(self._watchers)} file watcher(s)")
643
-
644
- @property
645
- def watchers(self) -> List[FileWatcher]:
646
- """List of active file watchers."""
647
- if not hasattr(self, "_watchers"):
648
- self._watchers = []
649
- return self._watchers
650
-
651
- @property
652
- def watching_directories(self) -> List[Path]:
653
- """List of directories being watched."""
654
- return [w.directory for w in self.watchers if w.is_running]
655
-
656
- @property
657
- def watcher_telemetry(self) -> Dict[str, Any]:
658
- """Combined telemetry from all watchers."""
659
- combined = {
660
- "files_created": 0,
661
- "files_modified": 0,
662
- "files_deleted": 0,
663
- "files_moved": 0,
664
- "total_events": 0,
665
- "watcher_count": len(self.watchers),
666
- "active_count": sum(1 for w in self.watchers if w.is_running),
667
- }
668
- for watcher in self.watchers:
669
- t = watcher.telemetry
670
- combined["files_created"] += t.get("files_created", 0)
671
- combined["files_modified"] += t.get("files_modified", 0)
672
- combined["files_deleted"] += t.get("files_deleted", 0)
673
- combined["files_moved"] += t.get("files_moved", 0)
674
- combined["total_events"] += t.get("total_events", 0)
675
- return combined
1
+ # Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
2
+ # SPDX-License-Identifier: MIT
3
+
4
+ """
5
+ Generic file watching utilities for GAIA agents.
6
+
7
+ Provides FileChangeHandler and FileWatcher for monitoring directories
8
+ and responding to file system events with callbacks.
9
+
10
+ Also provides file hashing utilities for duplicate detection.
11
+
12
+ Example:
13
+ from gaia.utils import FileChangeHandler, FileWatcher, compute_file_hash
14
+
15
+ def on_new_file(path: str):
16
+ print(f"New file: {path}")
17
+ file_hash = compute_file_hash(path)
18
+ print(f"Hash: {file_hash}")
19
+
20
+ watcher = FileWatcher(
21
+ directory="./data",
22
+ on_created=on_new_file,
23
+ extensions=[".pdf", ".txt"],
24
+ )
25
+ watcher.start()
26
+ """
27
+
28
+ import hashlib
29
+ import logging
30
+ import time
31
+ from pathlib import Path
32
+ from typing import Any, Callable, Dict, List, Optional, Set, Union
33
+
34
+ try:
35
+ from watchdog.events import FileSystemEvent, FileSystemEventHandler
36
+ from watchdog.observers import Observer
37
+
38
+ WATCHDOG_AVAILABLE = True
39
+ except ImportError:
40
+ # Create dummy base class when watchdog is not available
41
+ class FileSystemEventHandler:
42
+ """Dummy base class when watchdog is not installed."""
43
+
44
+ class FileSystemEvent:
45
+ """Dummy event class when watchdog is not installed."""
46
+
47
+ src_path: str = ""
48
+ dest_path: str = ""
49
+ is_directory: bool = False
50
+
51
+ Observer = None
52
+ WATCHDOG_AVAILABLE = False
53
+
54
+ logger = logging.getLogger(__name__)
55
+
56
+ # Type alias for event callbacks
57
+ EventCallback = Callable[[str], None]
58
+ MoveCallback = Callable[[str, str], None] # (src_path, dest_path)
59
+ FilterCallback = Callable[[str], bool]
60
+
61
+ # Default chunk size for file hashing (64KB)
62
+ HASH_CHUNK_SIZE = 65536
63
+
64
+
65
+ def compute_file_hash(
66
+ path: Union[str, Path],
67
+ algorithm: str = "sha256",
68
+ chunk_size: int = HASH_CHUNK_SIZE,
69
+ ) -> Optional[str]:
70
+ """
71
+ Compute a hash of a file's contents.
72
+
73
+ Uses chunked reading to handle large files efficiently without
74
+ loading the entire file into memory.
75
+
76
+ Args:
77
+ path: Path to the file to hash.
78
+ algorithm: Hash algorithm to use (default: sha256).
79
+ Supports any algorithm from hashlib.
80
+ chunk_size: Size of chunks to read at a time (default: 64KB).
81
+
82
+ Returns:
83
+ Hex-encoded hash string, or None if file cannot be read.
84
+
85
+ Example:
86
+ from gaia.utils import compute_file_hash
87
+
88
+ # Check if file was already processed
89
+ file_hash = compute_file_hash("intake_form.pdf")
90
+ if file_hash in processed_hashes:
91
+ print("Already processed")
92
+ else:
93
+ process_file("intake_form.pdf")
94
+ processed_hashes.add(file_hash)
95
+ """
96
+ try:
97
+ file_path = Path(path)
98
+ if not file_path.exists() or not file_path.is_file():
99
+ return None
100
+
101
+ hasher = hashlib.new(algorithm)
102
+ with open(file_path, "rb") as f:
103
+ while chunk := f.read(chunk_size):
104
+ hasher.update(chunk)
105
+ return hasher.hexdigest()
106
+ except (OSError, IOError, ValueError) as e:
107
+ logger.warning(f"Could not compute hash for {path}: {e}")
108
+ return None
109
+
110
+
111
+ def compute_bytes_hash(
112
+ data: bytes,
113
+ algorithm: str = "sha256",
114
+ ) -> str:
115
+ """
116
+ Compute a hash of bytes data.
117
+
118
+ Useful when the file content is already loaded in memory.
119
+
120
+ Args:
121
+ data: Bytes to hash.
122
+ algorithm: Hash algorithm to use (default: sha256).
123
+
124
+ Returns:
125
+ Hex-encoded hash string.
126
+
127
+ Example:
128
+ from gaia.utils import compute_bytes_hash
129
+
130
+ with open("file.pdf", "rb") as f:
131
+ content = f.read()
132
+ file_hash = compute_bytes_hash(content)
133
+ """
134
+ hasher = hashlib.new(algorithm)
135
+ hasher.update(data)
136
+ return hasher.hexdigest()
137
+
138
+
139
+ class FileChangeHandler(FileSystemEventHandler):
140
+ """
141
+ Generic handler for file system events.
142
+
143
+ A flexible, callback-based file system event handler that can be used
144
+ with any agent or application. Supports:
145
+ - Callbacks for created, modified, deleted, and moved events
146
+ - File extension filtering
147
+ - Custom filter predicates
148
+ - Debouncing to prevent duplicate events
149
+ - Telemetry tracking
150
+
151
+ Example:
152
+ from gaia.utils import FileChangeHandler
153
+ from watchdog.observers import Observer
154
+
155
+ def handle_new_file(path: str):
156
+ print(f"Processing: {path}")
157
+
158
+ handler = FileChangeHandler(
159
+ on_created=handle_new_file,
160
+ extensions=[".pdf", ".png", ".jpg"],
161
+ debounce_seconds=2.0,
162
+ )
163
+
164
+ observer = Observer()
165
+ observer.schedule(handler, "./intake_forms", recursive=False)
166
+ observer.start()
167
+ """
168
+
169
+ # Default extensions for document processing
170
+ DEFAULT_EXTENSIONS: List[str] = [
171
+ ".pdf",
172
+ ".txt",
173
+ ".md",
174
+ ".markdown",
175
+ ".csv",
176
+ ".json",
177
+ ".py",
178
+ ".js",
179
+ ".ts",
180
+ ".java",
181
+ ".cpp",
182
+ ".c",
183
+ ".html",
184
+ ".css",
185
+ ".yaml",
186
+ ".yml",
187
+ ".xml",
188
+ ".rst",
189
+ ".log",
190
+ ]
191
+
192
+ def __init__(
193
+ self,
194
+ on_created: Optional[EventCallback] = None,
195
+ on_modified: Optional[EventCallback] = None,
196
+ on_deleted: Optional[EventCallback] = None,
197
+ on_moved: Optional[MoveCallback] = None,
198
+ extensions: Optional[List[str]] = None,
199
+ filter_func: Optional[FilterCallback] = None,
200
+ debounce_seconds: float = 2.0,
201
+ ignore_directories: bool = True,
202
+ ):
203
+ """
204
+ Initialize FileChangeHandler.
205
+
206
+ Args:
207
+ on_created: Callback for file creation. Receives file path.
208
+ on_modified: Callback for file modification. Receives file path.
209
+ on_deleted: Callback for file deletion. Receives file path.
210
+ on_moved: Callback for file move/rename. Receives (src_path, dest_path).
211
+ extensions: List of file extensions to watch (e.g., [".pdf", ".txt"]).
212
+ If None, uses DEFAULT_EXTENSIONS.
213
+ If empty list [], watches all files.
214
+ filter_func: Custom filter function. If provided, called with file path
215
+ and should return True to process the event.
216
+ Takes precedence over extensions filter.
217
+ debounce_seconds: Minimum time between processing same file.
218
+ ignore_directories: If True, ignores directory events.
219
+
220
+ Example:
221
+ # Watch only PDFs and images
222
+ handler = FileChangeHandler(
223
+ on_created=process_file,
224
+ extensions=[".pdf", ".png", ".jpg"],
225
+ )
226
+
227
+ # Watch all files with custom filter
228
+ handler = FileChangeHandler(
229
+ on_created=process_file,
230
+ extensions=[], # Watch all
231
+ filter_func=lambda p: not p.startswith("."), # Exclude hidden
232
+ )
233
+ """
234
+ super().__init__()
235
+ self._on_created = on_created
236
+ self._on_modified = on_modified
237
+ self._on_deleted = on_deleted
238
+ self._on_moved = on_moved
239
+
240
+ # Set up extensions filter
241
+ if extensions is None:
242
+ self._extensions: Set[str] = set(self.DEFAULT_EXTENSIONS)
243
+ else:
244
+ # Normalize extensions to lowercase with leading dot
245
+ self._extensions = {
246
+ ext.lower() if ext.startswith(".") else f".{ext.lower()}"
247
+ for ext in extensions
248
+ }
249
+
250
+ self._filter_func = filter_func
251
+ self._debounce_seconds = debounce_seconds
252
+ self._ignore_directories = ignore_directories
253
+
254
+ # Debounce tracking
255
+ self._last_processed: Dict[str, float] = {}
256
+ self._max_cache_size = 1000
257
+
258
+ # Telemetry
259
+ self._telemetry: Dict[str, Any] = {
260
+ "files_created": 0,
261
+ "files_modified": 0,
262
+ "files_deleted": 0,
263
+ "files_moved": 0,
264
+ "total_events": 0,
265
+ "last_event_time": None,
266
+ }
267
+
268
+ def _should_process(self, file_path: str) -> bool:
269
+ """Check if file should be processed based on filters."""
270
+ # Custom filter takes precedence
271
+ if self._filter_func is not None:
272
+ return self._filter_func(file_path)
273
+
274
+ # Empty extensions list means watch all files
275
+ if not self._extensions:
276
+ return True
277
+
278
+ # Check extension
279
+ file_lower = file_path.lower()
280
+ return any(file_lower.endswith(ext) for ext in self._extensions)
281
+
282
+ def _is_debounced(self, file_path: str) -> bool:
283
+ """Check if file was recently processed (within debounce window)."""
284
+ current_time = time.time()
285
+ last_time = self._last_processed.get(file_path, 0)
286
+
287
+ if current_time - last_time <= self._debounce_seconds:
288
+ return True
289
+
290
+ # Update last processed time
291
+ self._last_processed[file_path] = current_time
292
+
293
+ # LRU cache eviction to prevent memory leaks
294
+ if len(self._last_processed) > self._max_cache_size:
295
+ num_to_remove = self._max_cache_size // 10
296
+ sorted_items = sorted(self._last_processed.items(), key=lambda x: x[1])
297
+ for path, _ in sorted_items[:num_to_remove]:
298
+ del self._last_processed[path]
299
+ logger.debug(f"Cleaned up {num_to_remove} old entries from debounce cache")
300
+
301
+ return False
302
+
303
+ def _update_telemetry(self, event_type: str) -> None:
304
+ """Update telemetry statistics."""
305
+ self._telemetry[event_type] += 1
306
+ self._telemetry["total_events"] += 1
307
+ self._telemetry["last_event_time"] = time.time()
308
+
309
+ # Log telemetry periodically
310
+ if self._telemetry["total_events"] % 10 == 0:
311
+ logger.debug(
312
+ f"File Watch Telemetry: "
313
+ f"Created: {self._telemetry['files_created']}, "
314
+ f"Modified: {self._telemetry['files_modified']}, "
315
+ f"Deleted: {self._telemetry['files_deleted']}, "
316
+ f"Moved: {self._telemetry['files_moved']}, "
317
+ f"Total: {self._telemetry['total_events']}"
318
+ )
319
+
320
+ def on_created(self, event: FileSystemEvent) -> None:
321
+ """Handle file creation."""
322
+ if self._ignore_directories and event.is_directory:
323
+ return
324
+
325
+ if self._on_created and self._should_process(event.src_path):
326
+ if not self._is_debounced(event.src_path):
327
+ logger.debug(f"File created: {event.src_path}")
328
+ try:
329
+ self._on_created(event.src_path)
330
+ self._update_telemetry("files_created")
331
+ except Exception as e:
332
+ logger.error(
333
+ f"Error in on_created callback for {event.src_path}: {e}"
334
+ )
335
+
336
+ def on_modified(self, event: FileSystemEvent) -> None:
337
+ """Handle file modification."""
338
+ if self._ignore_directories and event.is_directory:
339
+ return
340
+
341
+ if self._on_modified and self._should_process(event.src_path):
342
+ if not self._is_debounced(event.src_path):
343
+ logger.debug(f"File modified: {event.src_path}")
344
+ try:
345
+ self._on_modified(event.src_path)
346
+ self._update_telemetry("files_modified")
347
+ except Exception as e:
348
+ logger.error(
349
+ f"Error in on_modified callback for {event.src_path}: {e}"
350
+ )
351
+
352
+ def on_deleted(self, event: FileSystemEvent) -> None:
353
+ """Handle file deletion."""
354
+ if self._ignore_directories and event.is_directory:
355
+ return
356
+
357
+ if self._on_deleted and self._should_process(event.src_path):
358
+ logger.debug(f"File deleted: {event.src_path}")
359
+ try:
360
+ self._on_deleted(event.src_path)
361
+ self._update_telemetry("files_deleted")
362
+ # Clean up from debounce cache
363
+ self._last_processed.pop(event.src_path, None)
364
+ except Exception as e:
365
+ logger.error(f"Error in on_deleted callback for {event.src_path}: {e}")
366
+
367
+ def on_moved(self, event: FileSystemEvent) -> None:
368
+ """Handle file move/rename."""
369
+ if self._ignore_directories and event.is_directory:
370
+ return
371
+
372
+ src_path = event.src_path
373
+ dest_path = getattr(event, "dest_path", None)
374
+
375
+ if self._on_moved and dest_path:
376
+ # Process if either source or destination matches filter
377
+ if self._should_process(src_path) or self._should_process(dest_path):
378
+ logger.debug(f"File moved: {src_path} -> {dest_path}")
379
+ try:
380
+ self._on_moved(src_path, dest_path)
381
+ self._update_telemetry("files_moved")
382
+ # Update debounce cache
383
+ self._last_processed.pop(src_path, None)
384
+ except Exception as e:
385
+ logger.error(f"Error in on_moved callback for {src_path}: {e}")
386
+
387
+ @property
388
+ def telemetry(self) -> Dict[str, Any]:
389
+ """Get current telemetry statistics."""
390
+ return self._telemetry.copy()
391
+
392
+ def reset_telemetry(self) -> None:
393
+ """Reset telemetry counters."""
394
+ self._telemetry = {
395
+ "files_created": 0,
396
+ "files_modified": 0,
397
+ "files_deleted": 0,
398
+ "files_moved": 0,
399
+ "total_events": 0,
400
+ "last_event_time": None,
401
+ }
402
+
403
+
404
+ class FileWatcher:
405
+ """
406
+ Convenience wrapper for watching a directory with FileChangeHandler.
407
+
408
+ Combines Observer and FileChangeHandler for easy directory watching.
409
+ Handles start/stop lifecycle and provides a clean API.
410
+
411
+ Example:
412
+ from gaia.utils import FileWatcher
413
+
414
+ def process_intake(path: str):
415
+ print(f"Processing intake form: {path}")
416
+
417
+ watcher = FileWatcher(
418
+ directory="./intake_forms",
419
+ on_created=process_intake,
420
+ extensions=[".pdf", ".png", ".jpg"],
421
+ )
422
+
423
+ watcher.start()
424
+ # ... do work ...
425
+ watcher.stop()
426
+
427
+ # Or use as context manager:
428
+ with FileWatcher("./data", on_created=process) as watcher:
429
+ # watcher is running
430
+ pass
431
+ # watcher is stopped
432
+ """
433
+
434
+ def __init__(
435
+ self,
436
+ directory: Union[str, Path],
437
+ on_created: Optional[EventCallback] = None,
438
+ on_modified: Optional[EventCallback] = None,
439
+ on_deleted: Optional[EventCallback] = None,
440
+ on_moved: Optional[MoveCallback] = None,
441
+ extensions: Optional[List[str]] = None,
442
+ filter_func: Optional[FilterCallback] = None,
443
+ debounce_seconds: float = 2.0,
444
+ recursive: bool = False,
445
+ ):
446
+ """
447
+ Initialize FileWatcher.
448
+
449
+ Args:
450
+ directory: Directory path to watch.
451
+ on_created: Callback for file creation.
452
+ on_modified: Callback for file modification.
453
+ on_deleted: Callback for file deletion.
454
+ on_moved: Callback for file move/rename.
455
+ extensions: File extensions to watch. None uses defaults, [] watches all.
456
+ filter_func: Custom filter predicate.
457
+ debounce_seconds: Debounce time between processing same file.
458
+ recursive: If True, watch subdirectories recursively.
459
+
460
+ Raises:
461
+ ImportError: If watchdog package is not installed.
462
+ FileNotFoundError: If directory does not exist.
463
+ """
464
+ if not WATCHDOG_AVAILABLE:
465
+ raise ImportError(
466
+ "FileWatcher requires the 'watchdog' package.\n"
467
+ "Install with: pip install 'watchdog>=2.1.0'\n"
468
+ "Or: uv pip install -e '.[dev]'"
469
+ )
470
+
471
+ self._directory = Path(directory)
472
+ if not self._directory.exists():
473
+ raise FileNotFoundError(f"Directory does not exist: {directory}")
474
+
475
+ self._recursive = recursive
476
+ self._observer: Optional[Observer] = None
477
+
478
+ self._handler = FileChangeHandler(
479
+ on_created=on_created,
480
+ on_modified=on_modified,
481
+ on_deleted=on_deleted,
482
+ on_moved=on_moved,
483
+ extensions=extensions,
484
+ filter_func=filter_func,
485
+ debounce_seconds=debounce_seconds,
486
+ )
487
+
488
+ def start(self) -> None:
489
+ """
490
+ Start watching the directory.
491
+
492
+ Safe to call multiple times - will not start multiple observers.
493
+ """
494
+ if self._observer is not None:
495
+ logger.warning("FileWatcher already running")
496
+ return
497
+
498
+ self._observer = Observer()
499
+ self._observer.schedule(
500
+ self._handler,
501
+ str(self._directory),
502
+ recursive=self._recursive,
503
+ )
504
+ self._observer.start()
505
+ logger.info(
506
+ f"Started watching: {self._directory} " f"(recursive={self._recursive})"
507
+ )
508
+
509
+ def stop(self) -> None:
510
+ """
511
+ Stop watching the directory.
512
+
513
+ Safe to call multiple times.
514
+ """
515
+ if self._observer is not None:
516
+ self._observer.stop()
517
+ self._observer.join(timeout=5.0)
518
+ self._observer = None
519
+ logger.info(f"Stopped watching: {self._directory}")
520
+
521
+ @property
522
+ def is_running(self) -> bool:
523
+ """True if watcher is currently running."""
524
+ return self._observer is not None and self._observer.is_alive()
525
+
526
+ @property
527
+ def directory(self) -> Path:
528
+ """Directory being watched."""
529
+ return self._directory
530
+
531
+ @property
532
+ def telemetry(self) -> Dict[str, Any]:
533
+ """Get telemetry from the handler."""
534
+ return self._handler.telemetry
535
+
536
+ def __enter__(self) -> "FileWatcher":
537
+ """Context manager entry - starts watching."""
538
+ self.start()
539
+ return self
540
+
541
+ def __exit__(self, exc_type, exc_val, exc_tb) -> None:
542
+ """Context manager exit - stops watching."""
543
+ self.stop()
544
+
545
+
546
+ def check_watchdog_available() -> bool:
547
+ """Check if watchdog package is available."""
548
+ return WATCHDOG_AVAILABLE
549
+
550
+
551
+ class FileWatcherMixin:
552
+ """
553
+ Mixin providing file watching capabilities for GAIA agents.
554
+
555
+ Manages multiple FileWatcher instances with automatic cleanup.
556
+
557
+ Example:
558
+ from gaia import Agent, FileWatcherMixin
559
+
560
+ class IntakeAgent(Agent, FileWatcherMixin):
561
+ def __init__(self, **kwargs):
562
+ super().__init__(**kwargs)
563
+
564
+ self.watch_directory(
565
+ "./intake_forms",
566
+ on_created=self._process_form,
567
+ extensions=[".pdf", ".png"],
568
+ )
569
+
570
+ def _process_form(self, path: str):
571
+ print(f"Processing: {path}")
572
+ """
573
+
574
+ _watchers: List[FileWatcher]
575
+
576
+ def watch_directory(
577
+ self,
578
+ directory: Union[str, Path],
579
+ on_created: Optional[EventCallback] = None,
580
+ on_modified: Optional[EventCallback] = None,
581
+ on_deleted: Optional[EventCallback] = None,
582
+ on_moved: Optional[MoveCallback] = None,
583
+ extensions: Optional[List[str]] = None,
584
+ filter_func: Optional[FilterCallback] = None,
585
+ debounce_seconds: float = 2.0,
586
+ recursive: bool = False,
587
+ auto_start: bool = True,
588
+ ) -> FileWatcher:
589
+ """
590
+ Watch a directory for file changes.
591
+
592
+ Args:
593
+ directory: Directory path to watch.
594
+ on_created: Callback for file creation.
595
+ on_modified: Callback for file modification.
596
+ on_deleted: Callback for file deletion.
597
+ on_moved: Callback for file move/rename.
598
+ extensions: File extensions to watch. None uses defaults, [] watches all.
599
+ filter_func: Custom filter predicate.
600
+ debounce_seconds: Debounce time between processing same file.
601
+ recursive: If True, watch subdirectories recursively.
602
+ auto_start: If True, start watching immediately.
603
+
604
+ Returns:
605
+ The FileWatcher instance.
606
+
607
+ Example:
608
+ self.watch_directory(
609
+ "./data",
610
+ on_created=self.handle_new_file,
611
+ extensions=[".pdf", ".txt"],
612
+ )
613
+ """
614
+ # Initialize watchers list if needed
615
+ if not hasattr(self, "_watchers"):
616
+ self._watchers = []
617
+
618
+ watcher = FileWatcher(
619
+ directory=directory,
620
+ on_created=on_created,
621
+ on_modified=on_modified,
622
+ on_deleted=on_deleted,
623
+ on_moved=on_moved,
624
+ extensions=extensions,
625
+ filter_func=filter_func,
626
+ debounce_seconds=debounce_seconds,
627
+ recursive=recursive,
628
+ )
629
+
630
+ self._watchers.append(watcher)
631
+
632
+ if auto_start:
633
+ watcher.start()
634
+
635
+ return watcher
636
+
637
+ def stop_all_watchers(self) -> None:
638
+ """Stop all file watchers."""
639
+ if hasattr(self, "_watchers"):
640
+ for watcher in self._watchers:
641
+ watcher.stop()
642
+ logger.info(f"Stopped {len(self._watchers)} file watcher(s)")
643
+
644
+ @property
645
+ def watchers(self) -> List[FileWatcher]:
646
+ """List of active file watchers."""
647
+ if not hasattr(self, "_watchers"):
648
+ self._watchers = []
649
+ return self._watchers
650
+
651
+ @property
652
+ def watching_directories(self) -> List[Path]:
653
+ """List of directories being watched."""
654
+ return [w.directory for w in self.watchers if w.is_running]
655
+
656
+ @property
657
+ def watcher_telemetry(self) -> Dict[str, Any]:
658
+ """Combined telemetry from all watchers."""
659
+ combined = {
660
+ "files_created": 0,
661
+ "files_modified": 0,
662
+ "files_deleted": 0,
663
+ "files_moved": 0,
664
+ "total_events": 0,
665
+ "watcher_count": len(self.watchers),
666
+ "active_count": sum(1 for w in self.watchers if w.is_running),
667
+ }
668
+ for watcher in self.watchers:
669
+ t = watcher.telemetry
670
+ combined["files_created"] += t.get("files_created", 0)
671
+ combined["files_modified"] += t.get("files_modified", 0)
672
+ combined["files_deleted"] += t.get("files_deleted", 0)
673
+ combined["files_moved"] += t.get("files_moved", 0)
674
+ combined["total_events"] += t.get("total_events", 0)
675
+ return combined