amd-gaia 0.15.0__py3-none-any.whl → 0.15.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/METADATA +222 -223
  2. amd_gaia-0.15.2.dist-info/RECORD +182 -0
  3. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/WHEEL +1 -1
  4. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/entry_points.txt +1 -0
  5. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/licenses/LICENSE.md +20 -20
  6. gaia/__init__.py +29 -29
  7. gaia/agents/__init__.py +19 -19
  8. gaia/agents/base/__init__.py +9 -9
  9. gaia/agents/base/agent.py +2132 -2177
  10. gaia/agents/base/api_agent.py +119 -120
  11. gaia/agents/base/console.py +1967 -1841
  12. gaia/agents/base/errors.py +237 -237
  13. gaia/agents/base/mcp_agent.py +86 -86
  14. gaia/agents/base/tools.py +88 -83
  15. gaia/agents/blender/__init__.py +7 -0
  16. gaia/agents/blender/agent.py +553 -556
  17. gaia/agents/blender/agent_simple.py +133 -135
  18. gaia/agents/blender/app.py +211 -211
  19. gaia/agents/blender/app_simple.py +41 -41
  20. gaia/agents/blender/core/__init__.py +16 -16
  21. gaia/agents/blender/core/materials.py +506 -506
  22. gaia/agents/blender/core/objects.py +316 -316
  23. gaia/agents/blender/core/rendering.py +225 -225
  24. gaia/agents/blender/core/scene.py +220 -220
  25. gaia/agents/blender/core/view.py +146 -146
  26. gaia/agents/chat/__init__.py +9 -9
  27. gaia/agents/chat/agent.py +809 -835
  28. gaia/agents/chat/app.py +1065 -1058
  29. gaia/agents/chat/session.py +508 -508
  30. gaia/agents/chat/tools/__init__.py +15 -15
  31. gaia/agents/chat/tools/file_tools.py +96 -96
  32. gaia/agents/chat/tools/rag_tools.py +1744 -1729
  33. gaia/agents/chat/tools/shell_tools.py +437 -436
  34. gaia/agents/code/__init__.py +7 -7
  35. gaia/agents/code/agent.py +549 -549
  36. gaia/agents/code/cli.py +377 -0
  37. gaia/agents/code/models.py +135 -135
  38. gaia/agents/code/orchestration/__init__.py +24 -24
  39. gaia/agents/code/orchestration/checklist_executor.py +1763 -1763
  40. gaia/agents/code/orchestration/checklist_generator.py +713 -713
  41. gaia/agents/code/orchestration/factories/__init__.py +9 -9
  42. gaia/agents/code/orchestration/factories/base.py +63 -63
  43. gaia/agents/code/orchestration/factories/nextjs_factory.py +118 -118
  44. gaia/agents/code/orchestration/factories/python_factory.py +106 -106
  45. gaia/agents/code/orchestration/orchestrator.py +841 -841
  46. gaia/agents/code/orchestration/project_analyzer.py +391 -391
  47. gaia/agents/code/orchestration/steps/__init__.py +67 -67
  48. gaia/agents/code/orchestration/steps/base.py +188 -188
  49. gaia/agents/code/orchestration/steps/error_handler.py +314 -314
  50. gaia/agents/code/orchestration/steps/nextjs.py +828 -828
  51. gaia/agents/code/orchestration/steps/python.py +307 -307
  52. gaia/agents/code/orchestration/template_catalog.py +469 -469
  53. gaia/agents/code/orchestration/workflows/__init__.py +14 -14
  54. gaia/agents/code/orchestration/workflows/base.py +80 -80
  55. gaia/agents/code/orchestration/workflows/nextjs.py +186 -186
  56. gaia/agents/code/orchestration/workflows/python.py +94 -94
  57. gaia/agents/code/prompts/__init__.py +11 -11
  58. gaia/agents/code/prompts/base_prompt.py +77 -77
  59. gaia/agents/code/prompts/code_patterns.py +2034 -2036
  60. gaia/agents/code/prompts/nextjs_prompt.py +40 -40
  61. gaia/agents/code/prompts/python_prompt.py +109 -109
  62. gaia/agents/code/schema_inference.py +365 -365
  63. gaia/agents/code/system_prompt.py +41 -41
  64. gaia/agents/code/tools/__init__.py +42 -42
  65. gaia/agents/code/tools/cli_tools.py +1138 -1138
  66. gaia/agents/code/tools/code_formatting.py +319 -319
  67. gaia/agents/code/tools/code_tools.py +769 -769
  68. gaia/agents/code/tools/error_fixing.py +1347 -1347
  69. gaia/agents/code/tools/external_tools.py +180 -180
  70. gaia/agents/code/tools/file_io.py +845 -845
  71. gaia/agents/code/tools/prisma_tools.py +190 -190
  72. gaia/agents/code/tools/project_management.py +1016 -1016
  73. gaia/agents/code/tools/testing.py +321 -321
  74. gaia/agents/code/tools/typescript_tools.py +122 -122
  75. gaia/agents/code/tools/validation_parsing.py +461 -461
  76. gaia/agents/code/tools/validation_tools.py +806 -806
  77. gaia/agents/code/tools/web_dev_tools.py +1758 -1758
  78. gaia/agents/code/validators/__init__.py +16 -16
  79. gaia/agents/code/validators/antipattern_checker.py +241 -241
  80. gaia/agents/code/validators/ast_analyzer.py +197 -197
  81. gaia/agents/code/validators/requirements_validator.py +145 -145
  82. gaia/agents/code/validators/syntax_validator.py +171 -171
  83. gaia/agents/docker/__init__.py +7 -7
  84. gaia/agents/docker/agent.py +643 -642
  85. gaia/agents/emr/__init__.py +8 -8
  86. gaia/agents/emr/agent.py +1504 -1506
  87. gaia/agents/emr/cli.py +1322 -1322
  88. gaia/agents/emr/constants.py +475 -475
  89. gaia/agents/emr/dashboard/__init__.py +4 -4
  90. gaia/agents/emr/dashboard/server.py +1972 -1974
  91. gaia/agents/jira/__init__.py +11 -11
  92. gaia/agents/jira/agent.py +894 -894
  93. gaia/agents/jira/jql_templates.py +299 -299
  94. gaia/agents/routing/__init__.py +7 -7
  95. gaia/agents/routing/agent.py +567 -570
  96. gaia/agents/routing/system_prompt.py +75 -75
  97. gaia/agents/summarize/__init__.py +11 -0
  98. gaia/agents/summarize/agent.py +885 -0
  99. gaia/agents/summarize/prompts.py +129 -0
  100. gaia/api/__init__.py +23 -23
  101. gaia/api/agent_registry.py +238 -238
  102. gaia/api/app.py +305 -305
  103. gaia/api/openai_server.py +575 -575
  104. gaia/api/schemas.py +186 -186
  105. gaia/api/sse_handler.py +373 -373
  106. gaia/apps/__init__.py +4 -4
  107. gaia/apps/llm/__init__.py +6 -6
  108. gaia/apps/llm/app.py +184 -169
  109. gaia/apps/summarize/app.py +116 -633
  110. gaia/apps/summarize/html_viewer.py +133 -133
  111. gaia/apps/summarize/pdf_formatter.py +284 -284
  112. gaia/audio/__init__.py +2 -2
  113. gaia/audio/audio_client.py +439 -439
  114. gaia/audio/audio_recorder.py +269 -269
  115. gaia/audio/kokoro_tts.py +599 -599
  116. gaia/audio/whisper_asr.py +432 -432
  117. gaia/chat/__init__.py +16 -16
  118. gaia/chat/app.py +428 -430
  119. gaia/chat/prompts.py +522 -522
  120. gaia/chat/sdk.py +1228 -1225
  121. gaia/cli.py +5659 -5632
  122. gaia/database/__init__.py +10 -10
  123. gaia/database/agent.py +176 -176
  124. gaia/database/mixin.py +290 -290
  125. gaia/database/testing.py +64 -64
  126. gaia/eval/batch_experiment.py +2332 -2332
  127. gaia/eval/claude.py +542 -542
  128. gaia/eval/config.py +37 -37
  129. gaia/eval/email_generator.py +512 -512
  130. gaia/eval/eval.py +3179 -3179
  131. gaia/eval/groundtruth.py +1130 -1130
  132. gaia/eval/transcript_generator.py +582 -582
  133. gaia/eval/webapp/README.md +167 -167
  134. gaia/eval/webapp/package-lock.json +875 -875
  135. gaia/eval/webapp/package.json +20 -20
  136. gaia/eval/webapp/public/app.js +3402 -3402
  137. gaia/eval/webapp/public/index.html +87 -87
  138. gaia/eval/webapp/public/styles.css +3661 -3661
  139. gaia/eval/webapp/server.js +415 -415
  140. gaia/eval/webapp/test-setup.js +72 -72
  141. gaia/installer/__init__.py +23 -0
  142. gaia/installer/init_command.py +1275 -0
  143. gaia/installer/lemonade_installer.py +619 -0
  144. gaia/llm/__init__.py +10 -2
  145. gaia/llm/base_client.py +60 -0
  146. gaia/llm/exceptions.py +12 -0
  147. gaia/llm/factory.py +70 -0
  148. gaia/llm/lemonade_client.py +3421 -3221
  149. gaia/llm/lemonade_manager.py +294 -294
  150. gaia/llm/providers/__init__.py +9 -0
  151. gaia/llm/providers/claude.py +108 -0
  152. gaia/llm/providers/lemonade.py +118 -0
  153. gaia/llm/providers/openai_provider.py +79 -0
  154. gaia/llm/vlm_client.py +382 -382
  155. gaia/logger.py +189 -189
  156. gaia/mcp/agent_mcp_server.py +245 -245
  157. gaia/mcp/blender_mcp_client.py +138 -138
  158. gaia/mcp/blender_mcp_server.py +648 -648
  159. gaia/mcp/context7_cache.py +332 -332
  160. gaia/mcp/external_services.py +518 -518
  161. gaia/mcp/mcp_bridge.py +811 -550
  162. gaia/mcp/servers/__init__.py +6 -6
  163. gaia/mcp/servers/docker_mcp.py +83 -83
  164. gaia/perf_analysis.py +361 -0
  165. gaia/rag/__init__.py +10 -10
  166. gaia/rag/app.py +293 -293
  167. gaia/rag/demo.py +304 -304
  168. gaia/rag/pdf_utils.py +235 -235
  169. gaia/rag/sdk.py +2194 -2194
  170. gaia/security.py +183 -163
  171. gaia/talk/app.py +287 -289
  172. gaia/talk/sdk.py +538 -538
  173. gaia/testing/__init__.py +87 -87
  174. gaia/testing/assertions.py +330 -330
  175. gaia/testing/fixtures.py +333 -333
  176. gaia/testing/mocks.py +493 -493
  177. gaia/util.py +46 -46
  178. gaia/utils/__init__.py +33 -33
  179. gaia/utils/file_watcher.py +675 -675
  180. gaia/utils/parsing.py +223 -223
  181. gaia/version.py +100 -100
  182. amd_gaia-0.15.0.dist-info/RECORD +0 -168
  183. gaia/agents/code/app.py +0 -266
  184. gaia/llm/llm_client.py +0 -723
  185. {amd_gaia-0.15.0.dist-info → amd_gaia-0.15.2.dist-info}/top_level.txt +0 -0
@@ -1,675 +1,675 @@
1
- # Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
2
- # SPDX-License-Identifier: MIT
3
-
4
- """
5
- Generic file watching utilities for GAIA agents.
6
-
7
- Provides FileChangeHandler and FileWatcher for monitoring directories
8
- and responding to file system events with callbacks.
9
-
10
- Also provides file hashing utilities for duplicate detection.
11
-
12
- Example:
13
- from gaia.utils import FileChangeHandler, FileWatcher, compute_file_hash
14
-
15
- def on_new_file(path: str):
16
- print(f"New file: {path}")
17
- file_hash = compute_file_hash(path)
18
- print(f"Hash: {file_hash}")
19
-
20
- watcher = FileWatcher(
21
- directory="./data",
22
- on_created=on_new_file,
23
- extensions=[".pdf", ".txt"],
24
- )
25
- watcher.start()
26
- """
27
-
28
- import hashlib
29
- import logging
30
- import time
31
- from pathlib import Path
32
- from typing import Any, Callable, Dict, List, Optional, Set, Union
33
-
34
- try:
35
- from watchdog.events import FileSystemEvent, FileSystemEventHandler
36
- from watchdog.observers import Observer
37
-
38
- WATCHDOG_AVAILABLE = True
39
- except ImportError:
40
- # Create dummy base class when watchdog is not available
41
- class FileSystemEventHandler:
42
- """Dummy base class when watchdog is not installed."""
43
-
44
- class FileSystemEvent:
45
- """Dummy event class when watchdog is not installed."""
46
-
47
- src_path: str = ""
48
- dest_path: str = ""
49
- is_directory: bool = False
50
-
51
- Observer = None
52
- WATCHDOG_AVAILABLE = False
53
-
54
- logger = logging.getLogger(__name__)
55
-
56
- # Type alias for event callbacks
57
- EventCallback = Callable[[str], None]
58
- MoveCallback = Callable[[str, str], None] # (src_path, dest_path)
59
- FilterCallback = Callable[[str], bool]
60
-
61
- # Default chunk size for file hashing (64KB)
62
- HASH_CHUNK_SIZE = 65536
63
-
64
-
65
- def compute_file_hash(
66
- path: Union[str, Path],
67
- algorithm: str = "sha256",
68
- chunk_size: int = HASH_CHUNK_SIZE,
69
- ) -> Optional[str]:
70
- """
71
- Compute a hash of a file's contents.
72
-
73
- Uses chunked reading to handle large files efficiently without
74
- loading the entire file into memory.
75
-
76
- Args:
77
- path: Path to the file to hash.
78
- algorithm: Hash algorithm to use (default: sha256).
79
- Supports any algorithm from hashlib.
80
- chunk_size: Size of chunks to read at a time (default: 64KB).
81
-
82
- Returns:
83
- Hex-encoded hash string, or None if file cannot be read.
84
-
85
- Example:
86
- from gaia.utils import compute_file_hash
87
-
88
- # Check if file was already processed
89
- file_hash = compute_file_hash("intake_form.pdf")
90
- if file_hash in processed_hashes:
91
- print("Already processed")
92
- else:
93
- process_file("intake_form.pdf")
94
- processed_hashes.add(file_hash)
95
- """
96
- try:
97
- file_path = Path(path)
98
- if not file_path.exists() or not file_path.is_file():
99
- return None
100
-
101
- hasher = hashlib.new(algorithm)
102
- with open(file_path, "rb") as f:
103
- while chunk := f.read(chunk_size):
104
- hasher.update(chunk)
105
- return hasher.hexdigest()
106
- except (OSError, IOError, ValueError) as e:
107
- logger.warning(f"Could not compute hash for {path}: {e}")
108
- return None
109
-
110
-
111
- def compute_bytes_hash(
112
- data: bytes,
113
- algorithm: str = "sha256",
114
- ) -> str:
115
- """
116
- Compute a hash of bytes data.
117
-
118
- Useful when the file content is already loaded in memory.
119
-
120
- Args:
121
- data: Bytes to hash.
122
- algorithm: Hash algorithm to use (default: sha256).
123
-
124
- Returns:
125
- Hex-encoded hash string.
126
-
127
- Example:
128
- from gaia.utils import compute_bytes_hash
129
-
130
- with open("file.pdf", "rb") as f:
131
- content = f.read()
132
- file_hash = compute_bytes_hash(content)
133
- """
134
- hasher = hashlib.new(algorithm)
135
- hasher.update(data)
136
- return hasher.hexdigest()
137
-
138
-
139
- class FileChangeHandler(FileSystemEventHandler):
140
- """
141
- Generic handler for file system events.
142
-
143
- A flexible, callback-based file system event handler that can be used
144
- with any agent or application. Supports:
145
- - Callbacks for created, modified, deleted, and moved events
146
- - File extension filtering
147
- - Custom filter predicates
148
- - Debouncing to prevent duplicate events
149
- - Telemetry tracking
150
-
151
- Example:
152
- from gaia.utils import FileChangeHandler
153
- from watchdog.observers import Observer
154
-
155
- def handle_new_file(path: str):
156
- print(f"Processing: {path}")
157
-
158
- handler = FileChangeHandler(
159
- on_created=handle_new_file,
160
- extensions=[".pdf", ".png", ".jpg"],
161
- debounce_seconds=2.0,
162
- )
163
-
164
- observer = Observer()
165
- observer.schedule(handler, "./intake_forms", recursive=False)
166
- observer.start()
167
- """
168
-
169
- # Default extensions for document processing
170
- DEFAULT_EXTENSIONS: List[str] = [
171
- ".pdf",
172
- ".txt",
173
- ".md",
174
- ".markdown",
175
- ".csv",
176
- ".json",
177
- ".py",
178
- ".js",
179
- ".ts",
180
- ".java",
181
- ".cpp",
182
- ".c",
183
- ".html",
184
- ".css",
185
- ".yaml",
186
- ".yml",
187
- ".xml",
188
- ".rst",
189
- ".log",
190
- ]
191
-
192
- def __init__(
193
- self,
194
- on_created: Optional[EventCallback] = None,
195
- on_modified: Optional[EventCallback] = None,
196
- on_deleted: Optional[EventCallback] = None,
197
- on_moved: Optional[MoveCallback] = None,
198
- extensions: Optional[List[str]] = None,
199
- filter_func: Optional[FilterCallback] = None,
200
- debounce_seconds: float = 2.0,
201
- ignore_directories: bool = True,
202
- ):
203
- """
204
- Initialize FileChangeHandler.
205
-
206
- Args:
207
- on_created: Callback for file creation. Receives file path.
208
- on_modified: Callback for file modification. Receives file path.
209
- on_deleted: Callback for file deletion. Receives file path.
210
- on_moved: Callback for file move/rename. Receives (src_path, dest_path).
211
- extensions: List of file extensions to watch (e.g., [".pdf", ".txt"]).
212
- If None, uses DEFAULT_EXTENSIONS.
213
- If empty list [], watches all files.
214
- filter_func: Custom filter function. If provided, called with file path
215
- and should return True to process the event.
216
- Takes precedence over extensions filter.
217
- debounce_seconds: Minimum time between processing same file.
218
- ignore_directories: If True, ignores directory events.
219
-
220
- Example:
221
- # Watch only PDFs and images
222
- handler = FileChangeHandler(
223
- on_created=process_file,
224
- extensions=[".pdf", ".png", ".jpg"],
225
- )
226
-
227
- # Watch all files with custom filter
228
- handler = FileChangeHandler(
229
- on_created=process_file,
230
- extensions=[], # Watch all
231
- filter_func=lambda p: not p.startswith("."), # Exclude hidden
232
- )
233
- """
234
- super().__init__()
235
- self._on_created = on_created
236
- self._on_modified = on_modified
237
- self._on_deleted = on_deleted
238
- self._on_moved = on_moved
239
-
240
- # Set up extensions filter
241
- if extensions is None:
242
- self._extensions: Set[str] = set(self.DEFAULT_EXTENSIONS)
243
- else:
244
- # Normalize extensions to lowercase with leading dot
245
- self._extensions = {
246
- ext.lower() if ext.startswith(".") else f".{ext.lower()}"
247
- for ext in extensions
248
- }
249
-
250
- self._filter_func = filter_func
251
- self._debounce_seconds = debounce_seconds
252
- self._ignore_directories = ignore_directories
253
-
254
- # Debounce tracking
255
- self._last_processed: Dict[str, float] = {}
256
- self._max_cache_size = 1000
257
-
258
- # Telemetry
259
- self._telemetry: Dict[str, Any] = {
260
- "files_created": 0,
261
- "files_modified": 0,
262
- "files_deleted": 0,
263
- "files_moved": 0,
264
- "total_events": 0,
265
- "last_event_time": None,
266
- }
267
-
268
- def _should_process(self, file_path: str) -> bool:
269
- """Check if file should be processed based on filters."""
270
- # Custom filter takes precedence
271
- if self._filter_func is not None:
272
- return self._filter_func(file_path)
273
-
274
- # Empty extensions list means watch all files
275
- if not self._extensions:
276
- return True
277
-
278
- # Check extension
279
- file_lower = file_path.lower()
280
- return any(file_lower.endswith(ext) for ext in self._extensions)
281
-
282
- def _is_debounced(self, file_path: str) -> bool:
283
- """Check if file was recently processed (within debounce window)."""
284
- current_time = time.time()
285
- last_time = self._last_processed.get(file_path, 0)
286
-
287
- if current_time - last_time <= self._debounce_seconds:
288
- return True
289
-
290
- # Update last processed time
291
- self._last_processed[file_path] = current_time
292
-
293
- # LRU cache eviction to prevent memory leaks
294
- if len(self._last_processed) > self._max_cache_size:
295
- num_to_remove = self._max_cache_size // 10
296
- sorted_items = sorted(self._last_processed.items(), key=lambda x: x[1])
297
- for path, _ in sorted_items[:num_to_remove]:
298
- del self._last_processed[path]
299
- logger.debug(f"Cleaned up {num_to_remove} old entries from debounce cache")
300
-
301
- return False
302
-
303
- def _update_telemetry(self, event_type: str) -> None:
304
- """Update telemetry statistics."""
305
- self._telemetry[event_type] += 1
306
- self._telemetry["total_events"] += 1
307
- self._telemetry["last_event_time"] = time.time()
308
-
309
- # Log telemetry periodically
310
- if self._telemetry["total_events"] % 10 == 0:
311
- logger.debug(
312
- f"File Watch Telemetry: "
313
- f"Created: {self._telemetry['files_created']}, "
314
- f"Modified: {self._telemetry['files_modified']}, "
315
- f"Deleted: {self._telemetry['files_deleted']}, "
316
- f"Moved: {self._telemetry['files_moved']}, "
317
- f"Total: {self._telemetry['total_events']}"
318
- )
319
-
320
- def on_created(self, event: FileSystemEvent) -> None:
321
- """Handle file creation."""
322
- if self._ignore_directories and event.is_directory:
323
- return
324
-
325
- if self._on_created and self._should_process(event.src_path):
326
- if not self._is_debounced(event.src_path):
327
- logger.debug(f"File created: {event.src_path}")
328
- try:
329
- self._on_created(event.src_path)
330
- self._update_telemetry("files_created")
331
- except Exception as e:
332
- logger.error(
333
- f"Error in on_created callback for {event.src_path}: {e}"
334
- )
335
-
336
- def on_modified(self, event: FileSystemEvent) -> None:
337
- """Handle file modification."""
338
- if self._ignore_directories and event.is_directory:
339
- return
340
-
341
- if self._on_modified and self._should_process(event.src_path):
342
- if not self._is_debounced(event.src_path):
343
- logger.debug(f"File modified: {event.src_path}")
344
- try:
345
- self._on_modified(event.src_path)
346
- self._update_telemetry("files_modified")
347
- except Exception as e:
348
- logger.error(
349
- f"Error in on_modified callback for {event.src_path}: {e}"
350
- )
351
-
352
- def on_deleted(self, event: FileSystemEvent) -> None:
353
- """Handle file deletion."""
354
- if self._ignore_directories and event.is_directory:
355
- return
356
-
357
- if self._on_deleted and self._should_process(event.src_path):
358
- logger.debug(f"File deleted: {event.src_path}")
359
- try:
360
- self._on_deleted(event.src_path)
361
- self._update_telemetry("files_deleted")
362
- # Clean up from debounce cache
363
- self._last_processed.pop(event.src_path, None)
364
- except Exception as e:
365
- logger.error(f"Error in on_deleted callback for {event.src_path}: {e}")
366
-
367
- def on_moved(self, event: FileSystemEvent) -> None:
368
- """Handle file move/rename."""
369
- if self._ignore_directories and event.is_directory:
370
- return
371
-
372
- src_path = event.src_path
373
- dest_path = getattr(event, "dest_path", None)
374
-
375
- if self._on_moved and dest_path:
376
- # Process if either source or destination matches filter
377
- if self._should_process(src_path) or self._should_process(dest_path):
378
- logger.debug(f"File moved: {src_path} -> {dest_path}")
379
- try:
380
- self._on_moved(src_path, dest_path)
381
- self._update_telemetry("files_moved")
382
- # Update debounce cache
383
- self._last_processed.pop(src_path, None)
384
- except Exception as e:
385
- logger.error(f"Error in on_moved callback for {src_path}: {e}")
386
-
387
- @property
388
- def telemetry(self) -> Dict[str, Any]:
389
- """Get current telemetry statistics."""
390
- return self._telemetry.copy()
391
-
392
- def reset_telemetry(self) -> None:
393
- """Reset telemetry counters."""
394
- self._telemetry = {
395
- "files_created": 0,
396
- "files_modified": 0,
397
- "files_deleted": 0,
398
- "files_moved": 0,
399
- "total_events": 0,
400
- "last_event_time": None,
401
- }
402
-
403
-
404
- class FileWatcher:
405
- """
406
- Convenience wrapper for watching a directory with FileChangeHandler.
407
-
408
- Combines Observer and FileChangeHandler for easy directory watching.
409
- Handles start/stop lifecycle and provides a clean API.
410
-
411
- Example:
412
- from gaia.utils import FileWatcher
413
-
414
- def process_intake(path: str):
415
- print(f"Processing intake form: {path}")
416
-
417
- watcher = FileWatcher(
418
- directory="./intake_forms",
419
- on_created=process_intake,
420
- extensions=[".pdf", ".png", ".jpg"],
421
- )
422
-
423
- watcher.start()
424
- # ... do work ...
425
- watcher.stop()
426
-
427
- # Or use as context manager:
428
- with FileWatcher("./data", on_created=process) as watcher:
429
- # watcher is running
430
- pass
431
- # watcher is stopped
432
- """
433
-
434
- def __init__(
435
- self,
436
- directory: Union[str, Path],
437
- on_created: Optional[EventCallback] = None,
438
- on_modified: Optional[EventCallback] = None,
439
- on_deleted: Optional[EventCallback] = None,
440
- on_moved: Optional[MoveCallback] = None,
441
- extensions: Optional[List[str]] = None,
442
- filter_func: Optional[FilterCallback] = None,
443
- debounce_seconds: float = 2.0,
444
- recursive: bool = False,
445
- ):
446
- """
447
- Initialize FileWatcher.
448
-
449
- Args:
450
- directory: Directory path to watch.
451
- on_created: Callback for file creation.
452
- on_modified: Callback for file modification.
453
- on_deleted: Callback for file deletion.
454
- on_moved: Callback for file move/rename.
455
- extensions: File extensions to watch. None uses defaults, [] watches all.
456
- filter_func: Custom filter predicate.
457
- debounce_seconds: Debounce time between processing same file.
458
- recursive: If True, watch subdirectories recursively.
459
-
460
- Raises:
461
- ImportError: If watchdog package is not installed.
462
- FileNotFoundError: If directory does not exist.
463
- """
464
- if not WATCHDOG_AVAILABLE:
465
- raise ImportError(
466
- "FileWatcher requires the 'watchdog' package.\n"
467
- "Install with: pip install 'watchdog>=2.1.0'\n"
468
- "Or: uv pip install -e '.[dev]'"
469
- )
470
-
471
- self._directory = Path(directory)
472
- if not self._directory.exists():
473
- raise FileNotFoundError(f"Directory does not exist: {directory}")
474
-
475
- self._recursive = recursive
476
- self._observer: Optional[Observer] = None
477
-
478
- self._handler = FileChangeHandler(
479
- on_created=on_created,
480
- on_modified=on_modified,
481
- on_deleted=on_deleted,
482
- on_moved=on_moved,
483
- extensions=extensions,
484
- filter_func=filter_func,
485
- debounce_seconds=debounce_seconds,
486
- )
487
-
488
- def start(self) -> None:
489
- """
490
- Start watching the directory.
491
-
492
- Safe to call multiple times - will not start multiple observers.
493
- """
494
- if self._observer is not None:
495
- logger.warning("FileWatcher already running")
496
- return
497
-
498
- self._observer = Observer()
499
- self._observer.schedule(
500
- self._handler,
501
- str(self._directory),
502
- recursive=self._recursive,
503
- )
504
- self._observer.start()
505
- logger.info(
506
- f"Started watching: {self._directory} " f"(recursive={self._recursive})"
507
- )
508
-
509
- def stop(self) -> None:
510
- """
511
- Stop watching the directory.
512
-
513
- Safe to call multiple times.
514
- """
515
- if self._observer is not None:
516
- self._observer.stop()
517
- self._observer.join(timeout=5.0)
518
- self._observer = None
519
- logger.info(f"Stopped watching: {self._directory}")
520
-
521
- @property
522
- def is_running(self) -> bool:
523
- """True if watcher is currently running."""
524
- return self._observer is not None and self._observer.is_alive()
525
-
526
- @property
527
- def directory(self) -> Path:
528
- """Directory being watched."""
529
- return self._directory
530
-
531
- @property
532
- def telemetry(self) -> Dict[str, Any]:
533
- """Get telemetry from the handler."""
534
- return self._handler.telemetry
535
-
536
- def __enter__(self) -> "FileWatcher":
537
- """Context manager entry - starts watching."""
538
- self.start()
539
- return self
540
-
541
- def __exit__(self, exc_type, exc_val, exc_tb) -> None:
542
- """Context manager exit - stops watching."""
543
- self.stop()
544
-
545
-
546
- def check_watchdog_available() -> bool:
547
- """Check if watchdog package is available."""
548
- return WATCHDOG_AVAILABLE
549
-
550
-
551
- class FileWatcherMixin:
552
- """
553
- Mixin providing file watching capabilities for GAIA agents.
554
-
555
- Manages multiple FileWatcher instances with automatic cleanup.
556
-
557
- Example:
558
- from gaia import Agent, FileWatcherMixin
559
-
560
- class IntakeAgent(Agent, FileWatcherMixin):
561
- def __init__(self, **kwargs):
562
- super().__init__(**kwargs)
563
-
564
- self.watch_directory(
565
- "./intake_forms",
566
- on_created=self._process_form,
567
- extensions=[".pdf", ".png"],
568
- )
569
-
570
- def _process_form(self, path: str):
571
- print(f"Processing: {path}")
572
- """
573
-
574
- _watchers: List[FileWatcher]
575
-
576
- def watch_directory(
577
- self,
578
- directory: Union[str, Path],
579
- on_created: Optional[EventCallback] = None,
580
- on_modified: Optional[EventCallback] = None,
581
- on_deleted: Optional[EventCallback] = None,
582
- on_moved: Optional[MoveCallback] = None,
583
- extensions: Optional[List[str]] = None,
584
- filter_func: Optional[FilterCallback] = None,
585
- debounce_seconds: float = 2.0,
586
- recursive: bool = False,
587
- auto_start: bool = True,
588
- ) -> FileWatcher:
589
- """
590
- Watch a directory for file changes.
591
-
592
- Args:
593
- directory: Directory path to watch.
594
- on_created: Callback for file creation.
595
- on_modified: Callback for file modification.
596
- on_deleted: Callback for file deletion.
597
- on_moved: Callback for file move/rename.
598
- extensions: File extensions to watch. None uses defaults, [] watches all.
599
- filter_func: Custom filter predicate.
600
- debounce_seconds: Debounce time between processing same file.
601
- recursive: If True, watch subdirectories recursively.
602
- auto_start: If True, start watching immediately.
603
-
604
- Returns:
605
- The FileWatcher instance.
606
-
607
- Example:
608
- self.watch_directory(
609
- "./data",
610
- on_created=self.handle_new_file,
611
- extensions=[".pdf", ".txt"],
612
- )
613
- """
614
- # Initialize watchers list if needed
615
- if not hasattr(self, "_watchers"):
616
- self._watchers = []
617
-
618
- watcher = FileWatcher(
619
- directory=directory,
620
- on_created=on_created,
621
- on_modified=on_modified,
622
- on_deleted=on_deleted,
623
- on_moved=on_moved,
624
- extensions=extensions,
625
- filter_func=filter_func,
626
- debounce_seconds=debounce_seconds,
627
- recursive=recursive,
628
- )
629
-
630
- self._watchers.append(watcher)
631
-
632
- if auto_start:
633
- watcher.start()
634
-
635
- return watcher
636
-
637
- def stop_all_watchers(self) -> None:
638
- """Stop all file watchers."""
639
- if hasattr(self, "_watchers"):
640
- for watcher in self._watchers:
641
- watcher.stop()
642
- logger.info(f"Stopped {len(self._watchers)} file watcher(s)")
643
-
644
- @property
645
- def watchers(self) -> List[FileWatcher]:
646
- """List of active file watchers."""
647
- if not hasattr(self, "_watchers"):
648
- self._watchers = []
649
- return self._watchers
650
-
651
- @property
652
- def watching_directories(self) -> List[Path]:
653
- """List of directories being watched."""
654
- return [w.directory for w in self.watchers if w.is_running]
655
-
656
- @property
657
- def watcher_telemetry(self) -> Dict[str, Any]:
658
- """Combined telemetry from all watchers."""
659
- combined = {
660
- "files_created": 0,
661
- "files_modified": 0,
662
- "files_deleted": 0,
663
- "files_moved": 0,
664
- "total_events": 0,
665
- "watcher_count": len(self.watchers),
666
- "active_count": sum(1 for w in self.watchers if w.is_running),
667
- }
668
- for watcher in self.watchers:
669
- t = watcher.telemetry
670
- combined["files_created"] += t.get("files_created", 0)
671
- combined["files_modified"] += t.get("files_modified", 0)
672
- combined["files_deleted"] += t.get("files_deleted", 0)
673
- combined["files_moved"] += t.get("files_moved", 0)
674
- combined["total_events"] += t.get("total_events", 0)
675
- return combined
1
+ # Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
2
+ # SPDX-License-Identifier: MIT
3
+
4
+ """
5
+ Generic file watching utilities for GAIA agents.
6
+
7
+ Provides FileChangeHandler and FileWatcher for monitoring directories
8
+ and responding to file system events with callbacks.
9
+
10
+ Also provides file hashing utilities for duplicate detection.
11
+
12
+ Example:
13
+ from gaia.utils import FileChangeHandler, FileWatcher, compute_file_hash
14
+
15
+ def on_new_file(path: str):
16
+ print(f"New file: {path}")
17
+ file_hash = compute_file_hash(path)
18
+ print(f"Hash: {file_hash}")
19
+
20
+ watcher = FileWatcher(
21
+ directory="./data",
22
+ on_created=on_new_file,
23
+ extensions=[".pdf", ".txt"],
24
+ )
25
+ watcher.start()
26
+ """
27
+
28
+ import hashlib
29
+ import logging
30
+ import time
31
+ from pathlib import Path
32
+ from typing import Any, Callable, Dict, List, Optional, Set, Union
33
+
34
+ try:
35
+ from watchdog.events import FileSystemEvent, FileSystemEventHandler
36
+ from watchdog.observers import Observer
37
+
38
+ WATCHDOG_AVAILABLE = True
39
+ except ImportError:
40
+ # Create dummy base class when watchdog is not available
41
+ class FileSystemEventHandler:
42
+ """Dummy base class when watchdog is not installed."""
43
+
44
+ class FileSystemEvent:
45
+ """Dummy event class when watchdog is not installed."""
46
+
47
+ src_path: str = ""
48
+ dest_path: str = ""
49
+ is_directory: bool = False
50
+
51
+ Observer = None
52
+ WATCHDOG_AVAILABLE = False
53
+
54
+ logger = logging.getLogger(__name__)
55
+
56
+ # Type alias for event callbacks
57
+ EventCallback = Callable[[str], None]
58
+ MoveCallback = Callable[[str, str], None] # (src_path, dest_path)
59
+ FilterCallback = Callable[[str], bool]
60
+
61
+ # Default chunk size for file hashing (64KB)
62
+ HASH_CHUNK_SIZE = 65536
63
+
64
+
65
+ def compute_file_hash(
66
+ path: Union[str, Path],
67
+ algorithm: str = "sha256",
68
+ chunk_size: int = HASH_CHUNK_SIZE,
69
+ ) -> Optional[str]:
70
+ """
71
+ Compute a hash of a file's contents.
72
+
73
+ Uses chunked reading to handle large files efficiently without
74
+ loading the entire file into memory.
75
+
76
+ Args:
77
+ path: Path to the file to hash.
78
+ algorithm: Hash algorithm to use (default: sha256).
79
+ Supports any algorithm from hashlib.
80
+ chunk_size: Size of chunks to read at a time (default: 64KB).
81
+
82
+ Returns:
83
+ Hex-encoded hash string, or None if file cannot be read.
84
+
85
+ Example:
86
+ from gaia.utils import compute_file_hash
87
+
88
+ # Check if file was already processed
89
+ file_hash = compute_file_hash("intake_form.pdf")
90
+ if file_hash in processed_hashes:
91
+ print("Already processed")
92
+ else:
93
+ process_file("intake_form.pdf")
94
+ processed_hashes.add(file_hash)
95
+ """
96
+ try:
97
+ file_path = Path(path)
98
+ if not file_path.exists() or not file_path.is_file():
99
+ return None
100
+
101
+ hasher = hashlib.new(algorithm)
102
+ with open(file_path, "rb") as f:
103
+ while chunk := f.read(chunk_size):
104
+ hasher.update(chunk)
105
+ return hasher.hexdigest()
106
+ except (OSError, IOError, ValueError) as e:
107
+ logger.warning(f"Could not compute hash for {path}: {e}")
108
+ return None
109
+
110
+
111
+ def compute_bytes_hash(
112
+ data: bytes,
113
+ algorithm: str = "sha256",
114
+ ) -> str:
115
+ """
116
+ Compute a hash of bytes data.
117
+
118
+ Useful when the file content is already loaded in memory.
119
+
120
+ Args:
121
+ data: Bytes to hash.
122
+ algorithm: Hash algorithm to use (default: sha256).
123
+
124
+ Returns:
125
+ Hex-encoded hash string.
126
+
127
+ Example:
128
+ from gaia.utils import compute_bytes_hash
129
+
130
+ with open("file.pdf", "rb") as f:
131
+ content = f.read()
132
+ file_hash = compute_bytes_hash(content)
133
+ """
134
+ hasher = hashlib.new(algorithm)
135
+ hasher.update(data)
136
+ return hasher.hexdigest()
137
+
138
+
139
+ class FileChangeHandler(FileSystemEventHandler):
140
+ """
141
+ Generic handler for file system events.
142
+
143
+ A flexible, callback-based file system event handler that can be used
144
+ with any agent or application. Supports:
145
+ - Callbacks for created, modified, deleted, and moved events
146
+ - File extension filtering
147
+ - Custom filter predicates
148
+ - Debouncing to prevent duplicate events
149
+ - Telemetry tracking
150
+
151
+ Example:
152
+ from gaia.utils import FileChangeHandler
153
+ from watchdog.observers import Observer
154
+
155
+ def handle_new_file(path: str):
156
+ print(f"Processing: {path}")
157
+
158
+ handler = FileChangeHandler(
159
+ on_created=handle_new_file,
160
+ extensions=[".pdf", ".png", ".jpg"],
161
+ debounce_seconds=2.0,
162
+ )
163
+
164
+ observer = Observer()
165
+ observer.schedule(handler, "./intake_forms", recursive=False)
166
+ observer.start()
167
+ """
168
+
169
+ # Default extensions for document processing
170
+ DEFAULT_EXTENSIONS: List[str] = [
171
+ ".pdf",
172
+ ".txt",
173
+ ".md",
174
+ ".markdown",
175
+ ".csv",
176
+ ".json",
177
+ ".py",
178
+ ".js",
179
+ ".ts",
180
+ ".java",
181
+ ".cpp",
182
+ ".c",
183
+ ".html",
184
+ ".css",
185
+ ".yaml",
186
+ ".yml",
187
+ ".xml",
188
+ ".rst",
189
+ ".log",
190
+ ]
191
+
192
+ def __init__(
193
+ self,
194
+ on_created: Optional[EventCallback] = None,
195
+ on_modified: Optional[EventCallback] = None,
196
+ on_deleted: Optional[EventCallback] = None,
197
+ on_moved: Optional[MoveCallback] = None,
198
+ extensions: Optional[List[str]] = None,
199
+ filter_func: Optional[FilterCallback] = None,
200
+ debounce_seconds: float = 2.0,
201
+ ignore_directories: bool = True,
202
+ ):
203
+ """
204
+ Initialize FileChangeHandler.
205
+
206
+ Args:
207
+ on_created: Callback for file creation. Receives file path.
208
+ on_modified: Callback for file modification. Receives file path.
209
+ on_deleted: Callback for file deletion. Receives file path.
210
+ on_moved: Callback for file move/rename. Receives (src_path, dest_path).
211
+ extensions: List of file extensions to watch (e.g., [".pdf", ".txt"]).
212
+ If None, uses DEFAULT_EXTENSIONS.
213
+ If empty list [], watches all files.
214
+ filter_func: Custom filter function. If provided, called with file path
215
+ and should return True to process the event.
216
+ Takes precedence over extensions filter.
217
+ debounce_seconds: Minimum time between processing same file.
218
+ ignore_directories: If True, ignores directory events.
219
+
220
+ Example:
221
+ # Watch only PDFs and images
222
+ handler = FileChangeHandler(
223
+ on_created=process_file,
224
+ extensions=[".pdf", ".png", ".jpg"],
225
+ )
226
+
227
+ # Watch all files with custom filter
228
+ handler = FileChangeHandler(
229
+ on_created=process_file,
230
+ extensions=[], # Watch all
231
+ filter_func=lambda p: not p.startswith("."), # Exclude hidden
232
+ )
233
+ """
234
+ super().__init__()
235
+ self._on_created = on_created
236
+ self._on_modified = on_modified
237
+ self._on_deleted = on_deleted
238
+ self._on_moved = on_moved
239
+
240
+ # Set up extensions filter
241
+ if extensions is None:
242
+ self._extensions: Set[str] = set(self.DEFAULT_EXTENSIONS)
243
+ else:
244
+ # Normalize extensions to lowercase with leading dot
245
+ self._extensions = {
246
+ ext.lower() if ext.startswith(".") else f".{ext.lower()}"
247
+ for ext in extensions
248
+ }
249
+
250
+ self._filter_func = filter_func
251
+ self._debounce_seconds = debounce_seconds
252
+ self._ignore_directories = ignore_directories
253
+
254
+ # Debounce tracking
255
+ self._last_processed: Dict[str, float] = {}
256
+ self._max_cache_size = 1000
257
+
258
+ # Telemetry
259
+ self._telemetry: Dict[str, Any] = {
260
+ "files_created": 0,
261
+ "files_modified": 0,
262
+ "files_deleted": 0,
263
+ "files_moved": 0,
264
+ "total_events": 0,
265
+ "last_event_time": None,
266
+ }
267
+
268
+ def _should_process(self, file_path: str) -> bool:
269
+ """Check if file should be processed based on filters."""
270
+ # Custom filter takes precedence
271
+ if self._filter_func is not None:
272
+ return self._filter_func(file_path)
273
+
274
+ # Empty extensions list means watch all files
275
+ if not self._extensions:
276
+ return True
277
+
278
+ # Check extension
279
+ file_lower = file_path.lower()
280
+ return any(file_lower.endswith(ext) for ext in self._extensions)
281
+
282
+ def _is_debounced(self, file_path: str) -> bool:
283
+ """Check if file was recently processed (within debounce window)."""
284
+ current_time = time.time()
285
+ last_time = self._last_processed.get(file_path, 0)
286
+
287
+ if current_time - last_time <= self._debounce_seconds:
288
+ return True
289
+
290
+ # Update last processed time
291
+ self._last_processed[file_path] = current_time
292
+
293
+ # LRU cache eviction to prevent memory leaks
294
+ if len(self._last_processed) > self._max_cache_size:
295
+ num_to_remove = self._max_cache_size // 10
296
+ sorted_items = sorted(self._last_processed.items(), key=lambda x: x[1])
297
+ for path, _ in sorted_items[:num_to_remove]:
298
+ del self._last_processed[path]
299
+ logger.debug(f"Cleaned up {num_to_remove} old entries from debounce cache")
300
+
301
+ return False
302
+
303
+ def _update_telemetry(self, event_type: str) -> None:
304
+ """Update telemetry statistics."""
305
+ self._telemetry[event_type] += 1
306
+ self._telemetry["total_events"] += 1
307
+ self._telemetry["last_event_time"] = time.time()
308
+
309
+ # Log telemetry periodically
310
+ if self._telemetry["total_events"] % 10 == 0:
311
+ logger.debug(
312
+ f"File Watch Telemetry: "
313
+ f"Created: {self._telemetry['files_created']}, "
314
+ f"Modified: {self._telemetry['files_modified']}, "
315
+ f"Deleted: {self._telemetry['files_deleted']}, "
316
+ f"Moved: {self._telemetry['files_moved']}, "
317
+ f"Total: {self._telemetry['total_events']}"
318
+ )
319
+
320
+ def on_created(self, event: FileSystemEvent) -> None:
321
+ """Handle file creation."""
322
+ if self._ignore_directories and event.is_directory:
323
+ return
324
+
325
+ if self._on_created and self._should_process(event.src_path):
326
+ if not self._is_debounced(event.src_path):
327
+ logger.debug(f"File created: {event.src_path}")
328
+ try:
329
+ self._on_created(event.src_path)
330
+ self._update_telemetry("files_created")
331
+ except Exception as e:
332
+ logger.error(
333
+ f"Error in on_created callback for {event.src_path}: {e}"
334
+ )
335
+
336
+ def on_modified(self, event: FileSystemEvent) -> None:
337
+ """Handle file modification."""
338
+ if self._ignore_directories and event.is_directory:
339
+ return
340
+
341
+ if self._on_modified and self._should_process(event.src_path):
342
+ if not self._is_debounced(event.src_path):
343
+ logger.debug(f"File modified: {event.src_path}")
344
+ try:
345
+ self._on_modified(event.src_path)
346
+ self._update_telemetry("files_modified")
347
+ except Exception as e:
348
+ logger.error(
349
+ f"Error in on_modified callback for {event.src_path}: {e}"
350
+ )
351
+
352
+ def on_deleted(self, event: FileSystemEvent) -> None:
353
+ """Handle file deletion."""
354
+ if self._ignore_directories and event.is_directory:
355
+ return
356
+
357
+ if self._on_deleted and self._should_process(event.src_path):
358
+ logger.debug(f"File deleted: {event.src_path}")
359
+ try:
360
+ self._on_deleted(event.src_path)
361
+ self._update_telemetry("files_deleted")
362
+ # Clean up from debounce cache
363
+ self._last_processed.pop(event.src_path, None)
364
+ except Exception as e:
365
+ logger.error(f"Error in on_deleted callback for {event.src_path}: {e}")
366
+
367
+ def on_moved(self, event: FileSystemEvent) -> None:
368
+ """Handle file move/rename."""
369
+ if self._ignore_directories and event.is_directory:
370
+ return
371
+
372
+ src_path = event.src_path
373
+ dest_path = getattr(event, "dest_path", None)
374
+
375
+ if self._on_moved and dest_path:
376
+ # Process if either source or destination matches filter
377
+ if self._should_process(src_path) or self._should_process(dest_path):
378
+ logger.debug(f"File moved: {src_path} -> {dest_path}")
379
+ try:
380
+ self._on_moved(src_path, dest_path)
381
+ self._update_telemetry("files_moved")
382
+ # Update debounce cache
383
+ self._last_processed.pop(src_path, None)
384
+ except Exception as e:
385
+ logger.error(f"Error in on_moved callback for {src_path}: {e}")
386
+
387
+ @property
388
+ def telemetry(self) -> Dict[str, Any]:
389
+ """Get current telemetry statistics."""
390
+ return self._telemetry.copy()
391
+
392
+ def reset_telemetry(self) -> None:
393
+ """Reset telemetry counters."""
394
+ self._telemetry = {
395
+ "files_created": 0,
396
+ "files_modified": 0,
397
+ "files_deleted": 0,
398
+ "files_moved": 0,
399
+ "total_events": 0,
400
+ "last_event_time": None,
401
+ }
402
+
403
+
404
+ class FileWatcher:
405
+ """
406
+ Convenience wrapper for watching a directory with FileChangeHandler.
407
+
408
+ Combines Observer and FileChangeHandler for easy directory watching.
409
+ Handles start/stop lifecycle and provides a clean API.
410
+
411
+ Example:
412
+ from gaia.utils import FileWatcher
413
+
414
+ def process_intake(path: str):
415
+ print(f"Processing intake form: {path}")
416
+
417
+ watcher = FileWatcher(
418
+ directory="./intake_forms",
419
+ on_created=process_intake,
420
+ extensions=[".pdf", ".png", ".jpg"],
421
+ )
422
+
423
+ watcher.start()
424
+ # ... do work ...
425
+ watcher.stop()
426
+
427
+ # Or use as context manager:
428
+ with FileWatcher("./data", on_created=process) as watcher:
429
+ # watcher is running
430
+ pass
431
+ # watcher is stopped
432
+ """
433
+
434
+ def __init__(
435
+ self,
436
+ directory: Union[str, Path],
437
+ on_created: Optional[EventCallback] = None,
438
+ on_modified: Optional[EventCallback] = None,
439
+ on_deleted: Optional[EventCallback] = None,
440
+ on_moved: Optional[MoveCallback] = None,
441
+ extensions: Optional[List[str]] = None,
442
+ filter_func: Optional[FilterCallback] = None,
443
+ debounce_seconds: float = 2.0,
444
+ recursive: bool = False,
445
+ ):
446
+ """
447
+ Initialize FileWatcher.
448
+
449
+ Args:
450
+ directory: Directory path to watch.
451
+ on_created: Callback for file creation.
452
+ on_modified: Callback for file modification.
453
+ on_deleted: Callback for file deletion.
454
+ on_moved: Callback for file move/rename.
455
+ extensions: File extensions to watch. None uses defaults, [] watches all.
456
+ filter_func: Custom filter predicate.
457
+ debounce_seconds: Debounce time between processing same file.
458
+ recursive: If True, watch subdirectories recursively.
459
+
460
+ Raises:
461
+ ImportError: If watchdog package is not installed.
462
+ FileNotFoundError: If directory does not exist.
463
+ """
464
+ if not WATCHDOG_AVAILABLE:
465
+ raise ImportError(
466
+ "FileWatcher requires the 'watchdog' package.\n"
467
+ "Install with: pip install 'watchdog>=2.1.0'\n"
468
+ "Or: uv pip install -e '.[dev]'"
469
+ )
470
+
471
+ self._directory = Path(directory)
472
+ if not self._directory.exists():
473
+ raise FileNotFoundError(f"Directory does not exist: {directory}")
474
+
475
+ self._recursive = recursive
476
+ self._observer: Optional[Observer] = None
477
+
478
+ self._handler = FileChangeHandler(
479
+ on_created=on_created,
480
+ on_modified=on_modified,
481
+ on_deleted=on_deleted,
482
+ on_moved=on_moved,
483
+ extensions=extensions,
484
+ filter_func=filter_func,
485
+ debounce_seconds=debounce_seconds,
486
+ )
487
+
488
+ def start(self) -> None:
489
+ """
490
+ Start watching the directory.
491
+
492
+ Safe to call multiple times - will not start multiple observers.
493
+ """
494
+ if self._observer is not None:
495
+ logger.warning("FileWatcher already running")
496
+ return
497
+
498
+ self._observer = Observer()
499
+ self._observer.schedule(
500
+ self._handler,
501
+ str(self._directory),
502
+ recursive=self._recursive,
503
+ )
504
+ self._observer.start()
505
+ logger.info(
506
+ f"Started watching: {self._directory} " f"(recursive={self._recursive})"
507
+ )
508
+
509
+ def stop(self) -> None:
510
+ """
511
+ Stop watching the directory.
512
+
513
+ Safe to call multiple times.
514
+ """
515
+ if self._observer is not None:
516
+ self._observer.stop()
517
+ self._observer.join(timeout=5.0)
518
+ self._observer = None
519
+ logger.info(f"Stopped watching: {self._directory}")
520
+
521
+ @property
522
+ def is_running(self) -> bool:
523
+ """True if watcher is currently running."""
524
+ return self._observer is not None and self._observer.is_alive()
525
+
526
+ @property
527
+ def directory(self) -> Path:
528
+ """Directory being watched."""
529
+ return self._directory
530
+
531
+ @property
532
+ def telemetry(self) -> Dict[str, Any]:
533
+ """Get telemetry from the handler."""
534
+ return self._handler.telemetry
535
+
536
+ def __enter__(self) -> "FileWatcher":
537
+ """Context manager entry - starts watching."""
538
+ self.start()
539
+ return self
540
+
541
+ def __exit__(self, exc_type, exc_val, exc_tb) -> None:
542
+ """Context manager exit - stops watching."""
543
+ self.stop()
544
+
545
+
546
+ def check_watchdog_available() -> bool:
547
+ """Check if watchdog package is available."""
548
+ return WATCHDOG_AVAILABLE
549
+
550
+
551
+ class FileWatcherMixin:
552
+ """
553
+ Mixin providing file watching capabilities for GAIA agents.
554
+
555
+ Manages multiple FileWatcher instances with automatic cleanup.
556
+
557
+ Example:
558
+ from gaia import Agent, FileWatcherMixin
559
+
560
+ class IntakeAgent(Agent, FileWatcherMixin):
561
+ def __init__(self, **kwargs):
562
+ super().__init__(**kwargs)
563
+
564
+ self.watch_directory(
565
+ "./intake_forms",
566
+ on_created=self._process_form,
567
+ extensions=[".pdf", ".png"],
568
+ )
569
+
570
+ def _process_form(self, path: str):
571
+ print(f"Processing: {path}")
572
+ """
573
+
574
+ _watchers: List[FileWatcher]
575
+
576
+ def watch_directory(
577
+ self,
578
+ directory: Union[str, Path],
579
+ on_created: Optional[EventCallback] = None,
580
+ on_modified: Optional[EventCallback] = None,
581
+ on_deleted: Optional[EventCallback] = None,
582
+ on_moved: Optional[MoveCallback] = None,
583
+ extensions: Optional[List[str]] = None,
584
+ filter_func: Optional[FilterCallback] = None,
585
+ debounce_seconds: float = 2.0,
586
+ recursive: bool = False,
587
+ auto_start: bool = True,
588
+ ) -> FileWatcher:
589
+ """
590
+ Watch a directory for file changes.
591
+
592
+ Args:
593
+ directory: Directory path to watch.
594
+ on_created: Callback for file creation.
595
+ on_modified: Callback for file modification.
596
+ on_deleted: Callback for file deletion.
597
+ on_moved: Callback for file move/rename.
598
+ extensions: File extensions to watch. None uses defaults, [] watches all.
599
+ filter_func: Custom filter predicate.
600
+ debounce_seconds: Debounce time between processing same file.
601
+ recursive: If True, watch subdirectories recursively.
602
+ auto_start: If True, start watching immediately.
603
+
604
+ Returns:
605
+ The FileWatcher instance.
606
+
607
+ Example:
608
+ self.watch_directory(
609
+ "./data",
610
+ on_created=self.handle_new_file,
611
+ extensions=[".pdf", ".txt"],
612
+ )
613
+ """
614
+ # Initialize watchers list if needed
615
+ if not hasattr(self, "_watchers"):
616
+ self._watchers = []
617
+
618
+ watcher = FileWatcher(
619
+ directory=directory,
620
+ on_created=on_created,
621
+ on_modified=on_modified,
622
+ on_deleted=on_deleted,
623
+ on_moved=on_moved,
624
+ extensions=extensions,
625
+ filter_func=filter_func,
626
+ debounce_seconds=debounce_seconds,
627
+ recursive=recursive,
628
+ )
629
+
630
+ self._watchers.append(watcher)
631
+
632
+ if auto_start:
633
+ watcher.start()
634
+
635
+ return watcher
636
+
637
+ def stop_all_watchers(self) -> None:
638
+ """Stop all file watchers."""
639
+ if hasattr(self, "_watchers"):
640
+ for watcher in self._watchers:
641
+ watcher.stop()
642
+ logger.info(f"Stopped {len(self._watchers)} file watcher(s)")
643
+
644
+ @property
645
+ def watchers(self) -> List[FileWatcher]:
646
+ """List of active file watchers."""
647
+ if not hasattr(self, "_watchers"):
648
+ self._watchers = []
649
+ return self._watchers
650
+
651
+ @property
652
+ def watching_directories(self) -> List[Path]:
653
+ """List of directories being watched."""
654
+ return [w.directory for w in self.watchers if w.is_running]
655
+
656
+ @property
657
+ def watcher_telemetry(self) -> Dict[str, Any]:
658
+ """Combined telemetry from all watchers."""
659
+ combined = {
660
+ "files_created": 0,
661
+ "files_modified": 0,
662
+ "files_deleted": 0,
663
+ "files_moved": 0,
664
+ "total_events": 0,
665
+ "watcher_count": len(self.watchers),
666
+ "active_count": sum(1 for w in self.watchers if w.is_running),
667
+ }
668
+ for watcher in self.watchers:
669
+ t = watcher.telemetry
670
+ combined["files_created"] += t.get("files_created", 0)
671
+ combined["files_modified"] += t.get("files_modified", 0)
672
+ combined["files_deleted"] += t.get("files_deleted", 0)
673
+ combined["files_moved"] += t.get("files_moved", 0)
674
+ combined["total_events"] += t.get("total_events", 0)
675
+ return combined