amd-gaia 0.14.3__py3-none-any.whl → 0.15.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/METADATA +223 -223
- amd_gaia-0.15.1.dist-info/RECORD +178 -0
- {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/entry_points.txt +1 -0
- {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/licenses/LICENSE.md +20 -20
- gaia/__init__.py +29 -29
- gaia/agents/__init__.py +19 -19
- gaia/agents/base/__init__.py +9 -9
- gaia/agents/base/agent.py +2177 -2177
- gaia/agents/base/api_agent.py +120 -120
- gaia/agents/base/console.py +1841 -1841
- gaia/agents/base/errors.py +237 -237
- gaia/agents/base/mcp_agent.py +86 -86
- gaia/agents/base/tools.py +83 -83
- gaia/agents/blender/agent.py +556 -556
- gaia/agents/blender/agent_simple.py +133 -135
- gaia/agents/blender/app.py +211 -211
- gaia/agents/blender/app_simple.py +41 -41
- gaia/agents/blender/core/__init__.py +16 -16
- gaia/agents/blender/core/materials.py +506 -506
- gaia/agents/blender/core/objects.py +316 -316
- gaia/agents/blender/core/rendering.py +225 -225
- gaia/agents/blender/core/scene.py +220 -220
- gaia/agents/blender/core/view.py +146 -146
- gaia/agents/chat/__init__.py +9 -9
- gaia/agents/chat/agent.py +835 -835
- gaia/agents/chat/app.py +1058 -1058
- gaia/agents/chat/session.py +508 -508
- gaia/agents/chat/tools/__init__.py +15 -15
- gaia/agents/chat/tools/file_tools.py +96 -96
- gaia/agents/chat/tools/rag_tools.py +1729 -1729
- gaia/agents/chat/tools/shell_tools.py +436 -436
- gaia/agents/code/__init__.py +7 -7
- gaia/agents/code/agent.py +549 -549
- gaia/agents/code/cli.py +377 -0
- gaia/agents/code/models.py +135 -135
- gaia/agents/code/orchestration/__init__.py +24 -24
- gaia/agents/code/orchestration/checklist_executor.py +1763 -1763
- gaia/agents/code/orchestration/checklist_generator.py +713 -713
- gaia/agents/code/orchestration/factories/__init__.py +9 -9
- gaia/agents/code/orchestration/factories/base.py +63 -63
- gaia/agents/code/orchestration/factories/nextjs_factory.py +118 -118
- gaia/agents/code/orchestration/factories/python_factory.py +106 -106
- gaia/agents/code/orchestration/orchestrator.py +841 -841
- gaia/agents/code/orchestration/project_analyzer.py +391 -391
- gaia/agents/code/orchestration/steps/__init__.py +67 -67
- gaia/agents/code/orchestration/steps/base.py +188 -188
- gaia/agents/code/orchestration/steps/error_handler.py +314 -314
- gaia/agents/code/orchestration/steps/nextjs.py +828 -828
- gaia/agents/code/orchestration/steps/python.py +307 -307
- gaia/agents/code/orchestration/template_catalog.py +469 -469
- gaia/agents/code/orchestration/workflows/__init__.py +14 -14
- gaia/agents/code/orchestration/workflows/base.py +80 -80
- gaia/agents/code/orchestration/workflows/nextjs.py +186 -186
- gaia/agents/code/orchestration/workflows/python.py +94 -94
- gaia/agents/code/prompts/__init__.py +11 -11
- gaia/agents/code/prompts/base_prompt.py +77 -77
- gaia/agents/code/prompts/code_patterns.py +2036 -2036
- gaia/agents/code/prompts/nextjs_prompt.py +40 -40
- gaia/agents/code/prompts/python_prompt.py +109 -109
- gaia/agents/code/schema_inference.py +365 -365
- gaia/agents/code/system_prompt.py +41 -41
- gaia/agents/code/tools/__init__.py +42 -42
- gaia/agents/code/tools/cli_tools.py +1138 -1138
- gaia/agents/code/tools/code_formatting.py +319 -319
- gaia/agents/code/tools/code_tools.py +769 -769
- gaia/agents/code/tools/error_fixing.py +1347 -1347
- gaia/agents/code/tools/external_tools.py +180 -180
- gaia/agents/code/tools/file_io.py +845 -845
- gaia/agents/code/tools/prisma_tools.py +190 -190
- gaia/agents/code/tools/project_management.py +1016 -1016
- gaia/agents/code/tools/testing.py +321 -321
- gaia/agents/code/tools/typescript_tools.py +122 -122
- gaia/agents/code/tools/validation_parsing.py +461 -461
- gaia/agents/code/tools/validation_tools.py +806 -806
- gaia/agents/code/tools/web_dev_tools.py +1758 -1758
- gaia/agents/code/validators/__init__.py +16 -16
- gaia/agents/code/validators/antipattern_checker.py +241 -241
- gaia/agents/code/validators/ast_analyzer.py +197 -197
- gaia/agents/code/validators/requirements_validator.py +145 -145
- gaia/agents/code/validators/syntax_validator.py +171 -171
- gaia/agents/docker/__init__.py +7 -7
- gaia/agents/docker/agent.py +642 -642
- gaia/agents/emr/__init__.py +8 -8
- gaia/agents/emr/agent.py +1506 -1506
- gaia/agents/emr/cli.py +1322 -1322
- gaia/agents/emr/constants.py +475 -475
- gaia/agents/emr/dashboard/__init__.py +4 -4
- gaia/agents/emr/dashboard/server.py +1974 -1974
- gaia/agents/jira/__init__.py +11 -11
- gaia/agents/jira/agent.py +894 -894
- gaia/agents/jira/jql_templates.py +299 -299
- gaia/agents/routing/__init__.py +7 -7
- gaia/agents/routing/agent.py +567 -570
- gaia/agents/routing/system_prompt.py +75 -75
- gaia/agents/summarize/__init__.py +11 -0
- gaia/agents/summarize/agent.py +885 -0
- gaia/agents/summarize/prompts.py +129 -0
- gaia/api/__init__.py +23 -23
- gaia/api/agent_registry.py +238 -238
- gaia/api/app.py +305 -305
- gaia/api/openai_server.py +575 -575
- gaia/api/schemas.py +186 -186
- gaia/api/sse_handler.py +373 -373
- gaia/apps/__init__.py +4 -4
- gaia/apps/llm/__init__.py +6 -6
- gaia/apps/llm/app.py +173 -169
- gaia/apps/summarize/app.py +116 -633
- gaia/apps/summarize/html_viewer.py +133 -133
- gaia/apps/summarize/pdf_formatter.py +284 -284
- gaia/audio/__init__.py +2 -2
- gaia/audio/audio_client.py +439 -439
- gaia/audio/audio_recorder.py +269 -269
- gaia/audio/kokoro_tts.py +599 -599
- gaia/audio/whisper_asr.py +432 -432
- gaia/chat/__init__.py +16 -16
- gaia/chat/app.py +430 -430
- gaia/chat/prompts.py +522 -522
- gaia/chat/sdk.py +1228 -1225
- gaia/cli.py +5481 -5621
- gaia/database/__init__.py +10 -10
- gaia/database/agent.py +176 -176
- gaia/database/mixin.py +290 -290
- gaia/database/testing.py +64 -64
- gaia/eval/batch_experiment.py +2332 -2332
- gaia/eval/claude.py +542 -542
- gaia/eval/config.py +37 -37
- gaia/eval/email_generator.py +512 -512
- gaia/eval/eval.py +3179 -3179
- gaia/eval/groundtruth.py +1130 -1130
- gaia/eval/transcript_generator.py +582 -582
- gaia/eval/webapp/README.md +167 -167
- gaia/eval/webapp/package-lock.json +875 -875
- gaia/eval/webapp/package.json +20 -20
- gaia/eval/webapp/public/app.js +3402 -3402
- gaia/eval/webapp/public/index.html +87 -87
- gaia/eval/webapp/public/styles.css +3661 -3661
- gaia/eval/webapp/server.js +415 -415
- gaia/eval/webapp/test-setup.js +72 -72
- gaia/llm/__init__.py +9 -2
- gaia/llm/base_client.py +60 -0
- gaia/llm/exceptions.py +12 -0
- gaia/llm/factory.py +70 -0
- gaia/llm/lemonade_client.py +3236 -3221
- gaia/llm/lemonade_manager.py +294 -294
- gaia/llm/providers/__init__.py +9 -0
- gaia/llm/providers/claude.py +108 -0
- gaia/llm/providers/lemonade.py +120 -0
- gaia/llm/providers/openai_provider.py +79 -0
- gaia/llm/vlm_client.py +382 -382
- gaia/logger.py +189 -189
- gaia/mcp/agent_mcp_server.py +245 -245
- gaia/mcp/blender_mcp_client.py +138 -138
- gaia/mcp/blender_mcp_server.py +648 -648
- gaia/mcp/context7_cache.py +332 -332
- gaia/mcp/external_services.py +518 -518
- gaia/mcp/mcp_bridge.py +811 -550
- gaia/mcp/servers/__init__.py +6 -6
- gaia/mcp/servers/docker_mcp.py +83 -83
- gaia/perf_analysis.py +361 -0
- gaia/rag/__init__.py +10 -10
- gaia/rag/app.py +293 -293
- gaia/rag/demo.py +304 -304
- gaia/rag/pdf_utils.py +235 -235
- gaia/rag/sdk.py +2194 -2194
- gaia/security.py +163 -163
- gaia/talk/app.py +289 -289
- gaia/talk/sdk.py +538 -538
- gaia/testing/__init__.py +87 -87
- gaia/testing/assertions.py +330 -330
- gaia/testing/fixtures.py +333 -333
- gaia/testing/mocks.py +493 -493
- gaia/util.py +46 -46
- gaia/utils/__init__.py +33 -33
- gaia/utils/file_watcher.py +675 -675
- gaia/utils/parsing.py +223 -223
- gaia/version.py +100 -100
- amd_gaia-0.14.3.dist-info/RECORD +0 -168
- gaia/agents/code/app.py +0 -266
- gaia/llm/llm_client.py +0 -729
- {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/WHEEL +0 -0
- {amd_gaia-0.14.3.dist-info → amd_gaia-0.15.1.dist-info}/top_level.txt +0 -0
gaia/utils/file_watcher.py
CHANGED
|
@@ -1,675 +1,675 @@
|
|
|
1
|
-
# Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
|
|
2
|
-
# SPDX-License-Identifier: MIT
|
|
3
|
-
|
|
4
|
-
"""
|
|
5
|
-
Generic file watching utilities for GAIA agents.
|
|
6
|
-
|
|
7
|
-
Provides FileChangeHandler and FileWatcher for monitoring directories
|
|
8
|
-
and responding to file system events with callbacks.
|
|
9
|
-
|
|
10
|
-
Also provides file hashing utilities for duplicate detection.
|
|
11
|
-
|
|
12
|
-
Example:
|
|
13
|
-
from gaia.utils import FileChangeHandler, FileWatcher, compute_file_hash
|
|
14
|
-
|
|
15
|
-
def on_new_file(path: str):
|
|
16
|
-
print(f"New file: {path}")
|
|
17
|
-
file_hash = compute_file_hash(path)
|
|
18
|
-
print(f"Hash: {file_hash}")
|
|
19
|
-
|
|
20
|
-
watcher = FileWatcher(
|
|
21
|
-
directory="./data",
|
|
22
|
-
on_created=on_new_file,
|
|
23
|
-
extensions=[".pdf", ".txt"],
|
|
24
|
-
)
|
|
25
|
-
watcher.start()
|
|
26
|
-
"""
|
|
27
|
-
|
|
28
|
-
import hashlib
|
|
29
|
-
import logging
|
|
30
|
-
import time
|
|
31
|
-
from pathlib import Path
|
|
32
|
-
from typing import Any, Callable, Dict, List, Optional, Set, Union
|
|
33
|
-
|
|
34
|
-
try:
|
|
35
|
-
from watchdog.events import FileSystemEvent, FileSystemEventHandler
|
|
36
|
-
from watchdog.observers import Observer
|
|
37
|
-
|
|
38
|
-
WATCHDOG_AVAILABLE = True
|
|
39
|
-
except ImportError:
|
|
40
|
-
# Create dummy base class when watchdog is not available
|
|
41
|
-
class FileSystemEventHandler:
|
|
42
|
-
"""Dummy base class when watchdog is not installed."""
|
|
43
|
-
|
|
44
|
-
class FileSystemEvent:
|
|
45
|
-
"""Dummy event class when watchdog is not installed."""
|
|
46
|
-
|
|
47
|
-
src_path: str = ""
|
|
48
|
-
dest_path: str = ""
|
|
49
|
-
is_directory: bool = False
|
|
50
|
-
|
|
51
|
-
Observer = None
|
|
52
|
-
WATCHDOG_AVAILABLE = False
|
|
53
|
-
|
|
54
|
-
logger = logging.getLogger(__name__)
|
|
55
|
-
|
|
56
|
-
# Type alias for event callbacks
|
|
57
|
-
EventCallback = Callable[[str], None]
|
|
58
|
-
MoveCallback = Callable[[str, str], None] # (src_path, dest_path)
|
|
59
|
-
FilterCallback = Callable[[str], bool]
|
|
60
|
-
|
|
61
|
-
# Default chunk size for file hashing (64KB)
|
|
62
|
-
HASH_CHUNK_SIZE = 65536
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
def compute_file_hash(
|
|
66
|
-
path: Union[str, Path],
|
|
67
|
-
algorithm: str = "sha256",
|
|
68
|
-
chunk_size: int = HASH_CHUNK_SIZE,
|
|
69
|
-
) -> Optional[str]:
|
|
70
|
-
"""
|
|
71
|
-
Compute a hash of a file's contents.
|
|
72
|
-
|
|
73
|
-
Uses chunked reading to handle large files efficiently without
|
|
74
|
-
loading the entire file into memory.
|
|
75
|
-
|
|
76
|
-
Args:
|
|
77
|
-
path: Path to the file to hash.
|
|
78
|
-
algorithm: Hash algorithm to use (default: sha256).
|
|
79
|
-
Supports any algorithm from hashlib.
|
|
80
|
-
chunk_size: Size of chunks to read at a time (default: 64KB).
|
|
81
|
-
|
|
82
|
-
Returns:
|
|
83
|
-
Hex-encoded hash string, or None if file cannot be read.
|
|
84
|
-
|
|
85
|
-
Example:
|
|
86
|
-
from gaia.utils import compute_file_hash
|
|
87
|
-
|
|
88
|
-
# Check if file was already processed
|
|
89
|
-
file_hash = compute_file_hash("intake_form.pdf")
|
|
90
|
-
if file_hash in processed_hashes:
|
|
91
|
-
print("Already processed")
|
|
92
|
-
else:
|
|
93
|
-
process_file("intake_form.pdf")
|
|
94
|
-
processed_hashes.add(file_hash)
|
|
95
|
-
"""
|
|
96
|
-
try:
|
|
97
|
-
file_path = Path(path)
|
|
98
|
-
if not file_path.exists() or not file_path.is_file():
|
|
99
|
-
return None
|
|
100
|
-
|
|
101
|
-
hasher = hashlib.new(algorithm)
|
|
102
|
-
with open(file_path, "rb") as f:
|
|
103
|
-
while chunk := f.read(chunk_size):
|
|
104
|
-
hasher.update(chunk)
|
|
105
|
-
return hasher.hexdigest()
|
|
106
|
-
except (OSError, IOError, ValueError) as e:
|
|
107
|
-
logger.warning(f"Could not compute hash for {path}: {e}")
|
|
108
|
-
return None
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
def compute_bytes_hash(
|
|
112
|
-
data: bytes,
|
|
113
|
-
algorithm: str = "sha256",
|
|
114
|
-
) -> str:
|
|
115
|
-
"""
|
|
116
|
-
Compute a hash of bytes data.
|
|
117
|
-
|
|
118
|
-
Useful when the file content is already loaded in memory.
|
|
119
|
-
|
|
120
|
-
Args:
|
|
121
|
-
data: Bytes to hash.
|
|
122
|
-
algorithm: Hash algorithm to use (default: sha256).
|
|
123
|
-
|
|
124
|
-
Returns:
|
|
125
|
-
Hex-encoded hash string.
|
|
126
|
-
|
|
127
|
-
Example:
|
|
128
|
-
from gaia.utils import compute_bytes_hash
|
|
129
|
-
|
|
130
|
-
with open("file.pdf", "rb") as f:
|
|
131
|
-
content = f.read()
|
|
132
|
-
file_hash = compute_bytes_hash(content)
|
|
133
|
-
"""
|
|
134
|
-
hasher = hashlib.new(algorithm)
|
|
135
|
-
hasher.update(data)
|
|
136
|
-
return hasher.hexdigest()
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
class FileChangeHandler(FileSystemEventHandler):
|
|
140
|
-
"""
|
|
141
|
-
Generic handler for file system events.
|
|
142
|
-
|
|
143
|
-
A flexible, callback-based file system event handler that can be used
|
|
144
|
-
with any agent or application. Supports:
|
|
145
|
-
- Callbacks for created, modified, deleted, and moved events
|
|
146
|
-
- File extension filtering
|
|
147
|
-
- Custom filter predicates
|
|
148
|
-
- Debouncing to prevent duplicate events
|
|
149
|
-
- Telemetry tracking
|
|
150
|
-
|
|
151
|
-
Example:
|
|
152
|
-
from gaia.utils import FileChangeHandler
|
|
153
|
-
from watchdog.observers import Observer
|
|
154
|
-
|
|
155
|
-
def handle_new_file(path: str):
|
|
156
|
-
print(f"Processing: {path}")
|
|
157
|
-
|
|
158
|
-
handler = FileChangeHandler(
|
|
159
|
-
on_created=handle_new_file,
|
|
160
|
-
extensions=[".pdf", ".png", ".jpg"],
|
|
161
|
-
debounce_seconds=2.0,
|
|
162
|
-
)
|
|
163
|
-
|
|
164
|
-
observer = Observer()
|
|
165
|
-
observer.schedule(handler, "./intake_forms", recursive=False)
|
|
166
|
-
observer.start()
|
|
167
|
-
"""
|
|
168
|
-
|
|
169
|
-
# Default extensions for document processing
|
|
170
|
-
DEFAULT_EXTENSIONS: List[str] = [
|
|
171
|
-
".pdf",
|
|
172
|
-
".txt",
|
|
173
|
-
".md",
|
|
174
|
-
".markdown",
|
|
175
|
-
".csv",
|
|
176
|
-
".json",
|
|
177
|
-
".py",
|
|
178
|
-
".js",
|
|
179
|
-
".ts",
|
|
180
|
-
".java",
|
|
181
|
-
".cpp",
|
|
182
|
-
".c",
|
|
183
|
-
".html",
|
|
184
|
-
".css",
|
|
185
|
-
".yaml",
|
|
186
|
-
".yml",
|
|
187
|
-
".xml",
|
|
188
|
-
".rst",
|
|
189
|
-
".log",
|
|
190
|
-
]
|
|
191
|
-
|
|
192
|
-
def __init__(
|
|
193
|
-
self,
|
|
194
|
-
on_created: Optional[EventCallback] = None,
|
|
195
|
-
on_modified: Optional[EventCallback] = None,
|
|
196
|
-
on_deleted: Optional[EventCallback] = None,
|
|
197
|
-
on_moved: Optional[MoveCallback] = None,
|
|
198
|
-
extensions: Optional[List[str]] = None,
|
|
199
|
-
filter_func: Optional[FilterCallback] = None,
|
|
200
|
-
debounce_seconds: float = 2.0,
|
|
201
|
-
ignore_directories: bool = True,
|
|
202
|
-
):
|
|
203
|
-
"""
|
|
204
|
-
Initialize FileChangeHandler.
|
|
205
|
-
|
|
206
|
-
Args:
|
|
207
|
-
on_created: Callback for file creation. Receives file path.
|
|
208
|
-
on_modified: Callback for file modification. Receives file path.
|
|
209
|
-
on_deleted: Callback for file deletion. Receives file path.
|
|
210
|
-
on_moved: Callback for file move/rename. Receives (src_path, dest_path).
|
|
211
|
-
extensions: List of file extensions to watch (e.g., [".pdf", ".txt"]).
|
|
212
|
-
If None, uses DEFAULT_EXTENSIONS.
|
|
213
|
-
If empty list [], watches all files.
|
|
214
|
-
filter_func: Custom filter function. If provided, called with file path
|
|
215
|
-
and should return True to process the event.
|
|
216
|
-
Takes precedence over extensions filter.
|
|
217
|
-
debounce_seconds: Minimum time between processing same file.
|
|
218
|
-
ignore_directories: If True, ignores directory events.
|
|
219
|
-
|
|
220
|
-
Example:
|
|
221
|
-
# Watch only PDFs and images
|
|
222
|
-
handler = FileChangeHandler(
|
|
223
|
-
on_created=process_file,
|
|
224
|
-
extensions=[".pdf", ".png", ".jpg"],
|
|
225
|
-
)
|
|
226
|
-
|
|
227
|
-
# Watch all files with custom filter
|
|
228
|
-
handler = FileChangeHandler(
|
|
229
|
-
on_created=process_file,
|
|
230
|
-
extensions=[], # Watch all
|
|
231
|
-
filter_func=lambda p: not p.startswith("."), # Exclude hidden
|
|
232
|
-
)
|
|
233
|
-
"""
|
|
234
|
-
super().__init__()
|
|
235
|
-
self._on_created = on_created
|
|
236
|
-
self._on_modified = on_modified
|
|
237
|
-
self._on_deleted = on_deleted
|
|
238
|
-
self._on_moved = on_moved
|
|
239
|
-
|
|
240
|
-
# Set up extensions filter
|
|
241
|
-
if extensions is None:
|
|
242
|
-
self._extensions: Set[str] = set(self.DEFAULT_EXTENSIONS)
|
|
243
|
-
else:
|
|
244
|
-
# Normalize extensions to lowercase with leading dot
|
|
245
|
-
self._extensions = {
|
|
246
|
-
ext.lower() if ext.startswith(".") else f".{ext.lower()}"
|
|
247
|
-
for ext in extensions
|
|
248
|
-
}
|
|
249
|
-
|
|
250
|
-
self._filter_func = filter_func
|
|
251
|
-
self._debounce_seconds = debounce_seconds
|
|
252
|
-
self._ignore_directories = ignore_directories
|
|
253
|
-
|
|
254
|
-
# Debounce tracking
|
|
255
|
-
self._last_processed: Dict[str, float] = {}
|
|
256
|
-
self._max_cache_size = 1000
|
|
257
|
-
|
|
258
|
-
# Telemetry
|
|
259
|
-
self._telemetry: Dict[str, Any] = {
|
|
260
|
-
"files_created": 0,
|
|
261
|
-
"files_modified": 0,
|
|
262
|
-
"files_deleted": 0,
|
|
263
|
-
"files_moved": 0,
|
|
264
|
-
"total_events": 0,
|
|
265
|
-
"last_event_time": None,
|
|
266
|
-
}
|
|
267
|
-
|
|
268
|
-
def _should_process(self, file_path: str) -> bool:
|
|
269
|
-
"""Check if file should be processed based on filters."""
|
|
270
|
-
# Custom filter takes precedence
|
|
271
|
-
if self._filter_func is not None:
|
|
272
|
-
return self._filter_func(file_path)
|
|
273
|
-
|
|
274
|
-
# Empty extensions list means watch all files
|
|
275
|
-
if not self._extensions:
|
|
276
|
-
return True
|
|
277
|
-
|
|
278
|
-
# Check extension
|
|
279
|
-
file_lower = file_path.lower()
|
|
280
|
-
return any(file_lower.endswith(ext) for ext in self._extensions)
|
|
281
|
-
|
|
282
|
-
def _is_debounced(self, file_path: str) -> bool:
|
|
283
|
-
"""Check if file was recently processed (within debounce window)."""
|
|
284
|
-
current_time = time.time()
|
|
285
|
-
last_time = self._last_processed.get(file_path, 0)
|
|
286
|
-
|
|
287
|
-
if current_time - last_time <= self._debounce_seconds:
|
|
288
|
-
return True
|
|
289
|
-
|
|
290
|
-
# Update last processed time
|
|
291
|
-
self._last_processed[file_path] = current_time
|
|
292
|
-
|
|
293
|
-
# LRU cache eviction to prevent memory leaks
|
|
294
|
-
if len(self._last_processed) > self._max_cache_size:
|
|
295
|
-
num_to_remove = self._max_cache_size // 10
|
|
296
|
-
sorted_items = sorted(self._last_processed.items(), key=lambda x: x[1])
|
|
297
|
-
for path, _ in sorted_items[:num_to_remove]:
|
|
298
|
-
del self._last_processed[path]
|
|
299
|
-
logger.debug(f"Cleaned up {num_to_remove} old entries from debounce cache")
|
|
300
|
-
|
|
301
|
-
return False
|
|
302
|
-
|
|
303
|
-
def _update_telemetry(self, event_type: str) -> None:
|
|
304
|
-
"""Update telemetry statistics."""
|
|
305
|
-
self._telemetry[event_type] += 1
|
|
306
|
-
self._telemetry["total_events"] += 1
|
|
307
|
-
self._telemetry["last_event_time"] = time.time()
|
|
308
|
-
|
|
309
|
-
# Log telemetry periodically
|
|
310
|
-
if self._telemetry["total_events"] % 10 == 0:
|
|
311
|
-
logger.debug(
|
|
312
|
-
f"File Watch Telemetry: "
|
|
313
|
-
f"Created: {self._telemetry['files_created']}, "
|
|
314
|
-
f"Modified: {self._telemetry['files_modified']}, "
|
|
315
|
-
f"Deleted: {self._telemetry['files_deleted']}, "
|
|
316
|
-
f"Moved: {self._telemetry['files_moved']}, "
|
|
317
|
-
f"Total: {self._telemetry['total_events']}"
|
|
318
|
-
)
|
|
319
|
-
|
|
320
|
-
def on_created(self, event: FileSystemEvent) -> None:
|
|
321
|
-
"""Handle file creation."""
|
|
322
|
-
if self._ignore_directories and event.is_directory:
|
|
323
|
-
return
|
|
324
|
-
|
|
325
|
-
if self._on_created and self._should_process(event.src_path):
|
|
326
|
-
if not self._is_debounced(event.src_path):
|
|
327
|
-
logger.debug(f"File created: {event.src_path}")
|
|
328
|
-
try:
|
|
329
|
-
self._on_created(event.src_path)
|
|
330
|
-
self._update_telemetry("files_created")
|
|
331
|
-
except Exception as e:
|
|
332
|
-
logger.error(
|
|
333
|
-
f"Error in on_created callback for {event.src_path}: {e}"
|
|
334
|
-
)
|
|
335
|
-
|
|
336
|
-
def on_modified(self, event: FileSystemEvent) -> None:
|
|
337
|
-
"""Handle file modification."""
|
|
338
|
-
if self._ignore_directories and event.is_directory:
|
|
339
|
-
return
|
|
340
|
-
|
|
341
|
-
if self._on_modified and self._should_process(event.src_path):
|
|
342
|
-
if not self._is_debounced(event.src_path):
|
|
343
|
-
logger.debug(f"File modified: {event.src_path}")
|
|
344
|
-
try:
|
|
345
|
-
self._on_modified(event.src_path)
|
|
346
|
-
self._update_telemetry("files_modified")
|
|
347
|
-
except Exception as e:
|
|
348
|
-
logger.error(
|
|
349
|
-
f"Error in on_modified callback for {event.src_path}: {e}"
|
|
350
|
-
)
|
|
351
|
-
|
|
352
|
-
def on_deleted(self, event: FileSystemEvent) -> None:
|
|
353
|
-
"""Handle file deletion."""
|
|
354
|
-
if self._ignore_directories and event.is_directory:
|
|
355
|
-
return
|
|
356
|
-
|
|
357
|
-
if self._on_deleted and self._should_process(event.src_path):
|
|
358
|
-
logger.debug(f"File deleted: {event.src_path}")
|
|
359
|
-
try:
|
|
360
|
-
self._on_deleted(event.src_path)
|
|
361
|
-
self._update_telemetry("files_deleted")
|
|
362
|
-
# Clean up from debounce cache
|
|
363
|
-
self._last_processed.pop(event.src_path, None)
|
|
364
|
-
except Exception as e:
|
|
365
|
-
logger.error(f"Error in on_deleted callback for {event.src_path}: {e}")
|
|
366
|
-
|
|
367
|
-
def on_moved(self, event: FileSystemEvent) -> None:
|
|
368
|
-
"""Handle file move/rename."""
|
|
369
|
-
if self._ignore_directories and event.is_directory:
|
|
370
|
-
return
|
|
371
|
-
|
|
372
|
-
src_path = event.src_path
|
|
373
|
-
dest_path = getattr(event, "dest_path", None)
|
|
374
|
-
|
|
375
|
-
if self._on_moved and dest_path:
|
|
376
|
-
# Process if either source or destination matches filter
|
|
377
|
-
if self._should_process(src_path) or self._should_process(dest_path):
|
|
378
|
-
logger.debug(f"File moved: {src_path} -> {dest_path}")
|
|
379
|
-
try:
|
|
380
|
-
self._on_moved(src_path, dest_path)
|
|
381
|
-
self._update_telemetry("files_moved")
|
|
382
|
-
# Update debounce cache
|
|
383
|
-
self._last_processed.pop(src_path, None)
|
|
384
|
-
except Exception as e:
|
|
385
|
-
logger.error(f"Error in on_moved callback for {src_path}: {e}")
|
|
386
|
-
|
|
387
|
-
@property
|
|
388
|
-
def telemetry(self) -> Dict[str, Any]:
|
|
389
|
-
"""Get current telemetry statistics."""
|
|
390
|
-
return self._telemetry.copy()
|
|
391
|
-
|
|
392
|
-
def reset_telemetry(self) -> None:
|
|
393
|
-
"""Reset telemetry counters."""
|
|
394
|
-
self._telemetry = {
|
|
395
|
-
"files_created": 0,
|
|
396
|
-
"files_modified": 0,
|
|
397
|
-
"files_deleted": 0,
|
|
398
|
-
"files_moved": 0,
|
|
399
|
-
"total_events": 0,
|
|
400
|
-
"last_event_time": None,
|
|
401
|
-
}
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
class FileWatcher:
|
|
405
|
-
"""
|
|
406
|
-
Convenience wrapper for watching a directory with FileChangeHandler.
|
|
407
|
-
|
|
408
|
-
Combines Observer and FileChangeHandler for easy directory watching.
|
|
409
|
-
Handles start/stop lifecycle and provides a clean API.
|
|
410
|
-
|
|
411
|
-
Example:
|
|
412
|
-
from gaia.utils import FileWatcher
|
|
413
|
-
|
|
414
|
-
def process_intake(path: str):
|
|
415
|
-
print(f"Processing intake form: {path}")
|
|
416
|
-
|
|
417
|
-
watcher = FileWatcher(
|
|
418
|
-
directory="./intake_forms",
|
|
419
|
-
on_created=process_intake,
|
|
420
|
-
extensions=[".pdf", ".png", ".jpg"],
|
|
421
|
-
)
|
|
422
|
-
|
|
423
|
-
watcher.start()
|
|
424
|
-
# ... do work ...
|
|
425
|
-
watcher.stop()
|
|
426
|
-
|
|
427
|
-
# Or use as context manager:
|
|
428
|
-
with FileWatcher("./data", on_created=process) as watcher:
|
|
429
|
-
# watcher is running
|
|
430
|
-
pass
|
|
431
|
-
# watcher is stopped
|
|
432
|
-
"""
|
|
433
|
-
|
|
434
|
-
def __init__(
|
|
435
|
-
self,
|
|
436
|
-
directory: Union[str, Path],
|
|
437
|
-
on_created: Optional[EventCallback] = None,
|
|
438
|
-
on_modified: Optional[EventCallback] = None,
|
|
439
|
-
on_deleted: Optional[EventCallback] = None,
|
|
440
|
-
on_moved: Optional[MoveCallback] = None,
|
|
441
|
-
extensions: Optional[List[str]] = None,
|
|
442
|
-
filter_func: Optional[FilterCallback] = None,
|
|
443
|
-
debounce_seconds: float = 2.0,
|
|
444
|
-
recursive: bool = False,
|
|
445
|
-
):
|
|
446
|
-
"""
|
|
447
|
-
Initialize FileWatcher.
|
|
448
|
-
|
|
449
|
-
Args:
|
|
450
|
-
directory: Directory path to watch.
|
|
451
|
-
on_created: Callback for file creation.
|
|
452
|
-
on_modified: Callback for file modification.
|
|
453
|
-
on_deleted: Callback for file deletion.
|
|
454
|
-
on_moved: Callback for file move/rename.
|
|
455
|
-
extensions: File extensions to watch. None uses defaults, [] watches all.
|
|
456
|
-
filter_func: Custom filter predicate.
|
|
457
|
-
debounce_seconds: Debounce time between processing same file.
|
|
458
|
-
recursive: If True, watch subdirectories recursively.
|
|
459
|
-
|
|
460
|
-
Raises:
|
|
461
|
-
ImportError: If watchdog package is not installed.
|
|
462
|
-
FileNotFoundError: If directory does not exist.
|
|
463
|
-
"""
|
|
464
|
-
if not WATCHDOG_AVAILABLE:
|
|
465
|
-
raise ImportError(
|
|
466
|
-
"FileWatcher requires the 'watchdog' package.\n"
|
|
467
|
-
"Install with: pip install 'watchdog>=2.1.0'\n"
|
|
468
|
-
"Or: uv pip install -e '.[dev]'"
|
|
469
|
-
)
|
|
470
|
-
|
|
471
|
-
self._directory = Path(directory)
|
|
472
|
-
if not self._directory.exists():
|
|
473
|
-
raise FileNotFoundError(f"Directory does not exist: {directory}")
|
|
474
|
-
|
|
475
|
-
self._recursive = recursive
|
|
476
|
-
self._observer: Optional[Observer] = None
|
|
477
|
-
|
|
478
|
-
self._handler = FileChangeHandler(
|
|
479
|
-
on_created=on_created,
|
|
480
|
-
on_modified=on_modified,
|
|
481
|
-
on_deleted=on_deleted,
|
|
482
|
-
on_moved=on_moved,
|
|
483
|
-
extensions=extensions,
|
|
484
|
-
filter_func=filter_func,
|
|
485
|
-
debounce_seconds=debounce_seconds,
|
|
486
|
-
)
|
|
487
|
-
|
|
488
|
-
def start(self) -> None:
|
|
489
|
-
"""
|
|
490
|
-
Start watching the directory.
|
|
491
|
-
|
|
492
|
-
Safe to call multiple times - will not start multiple observers.
|
|
493
|
-
"""
|
|
494
|
-
if self._observer is not None:
|
|
495
|
-
logger.warning("FileWatcher already running")
|
|
496
|
-
return
|
|
497
|
-
|
|
498
|
-
self._observer = Observer()
|
|
499
|
-
self._observer.schedule(
|
|
500
|
-
self._handler,
|
|
501
|
-
str(self._directory),
|
|
502
|
-
recursive=self._recursive,
|
|
503
|
-
)
|
|
504
|
-
self._observer.start()
|
|
505
|
-
logger.info(
|
|
506
|
-
f"Started watching: {self._directory} " f"(recursive={self._recursive})"
|
|
507
|
-
)
|
|
508
|
-
|
|
509
|
-
def stop(self) -> None:
|
|
510
|
-
"""
|
|
511
|
-
Stop watching the directory.
|
|
512
|
-
|
|
513
|
-
Safe to call multiple times.
|
|
514
|
-
"""
|
|
515
|
-
if self._observer is not None:
|
|
516
|
-
self._observer.stop()
|
|
517
|
-
self._observer.join(timeout=5.0)
|
|
518
|
-
self._observer = None
|
|
519
|
-
logger.info(f"Stopped watching: {self._directory}")
|
|
520
|
-
|
|
521
|
-
@property
|
|
522
|
-
def is_running(self) -> bool:
|
|
523
|
-
"""True if watcher is currently running."""
|
|
524
|
-
return self._observer is not None and self._observer.is_alive()
|
|
525
|
-
|
|
526
|
-
@property
|
|
527
|
-
def directory(self) -> Path:
|
|
528
|
-
"""Directory being watched."""
|
|
529
|
-
return self._directory
|
|
530
|
-
|
|
531
|
-
@property
|
|
532
|
-
def telemetry(self) -> Dict[str, Any]:
|
|
533
|
-
"""Get telemetry from the handler."""
|
|
534
|
-
return self._handler.telemetry
|
|
535
|
-
|
|
536
|
-
def __enter__(self) -> "FileWatcher":
|
|
537
|
-
"""Context manager entry - starts watching."""
|
|
538
|
-
self.start()
|
|
539
|
-
return self
|
|
540
|
-
|
|
541
|
-
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
542
|
-
"""Context manager exit - stops watching."""
|
|
543
|
-
self.stop()
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
def check_watchdog_available() -> bool:
|
|
547
|
-
"""Check if watchdog package is available."""
|
|
548
|
-
return WATCHDOG_AVAILABLE
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
class FileWatcherMixin:
|
|
552
|
-
"""
|
|
553
|
-
Mixin providing file watching capabilities for GAIA agents.
|
|
554
|
-
|
|
555
|
-
Manages multiple FileWatcher instances with automatic cleanup.
|
|
556
|
-
|
|
557
|
-
Example:
|
|
558
|
-
from gaia import Agent, FileWatcherMixin
|
|
559
|
-
|
|
560
|
-
class IntakeAgent(Agent, FileWatcherMixin):
|
|
561
|
-
def __init__(self, **kwargs):
|
|
562
|
-
super().__init__(**kwargs)
|
|
563
|
-
|
|
564
|
-
self.watch_directory(
|
|
565
|
-
"./intake_forms",
|
|
566
|
-
on_created=self._process_form,
|
|
567
|
-
extensions=[".pdf", ".png"],
|
|
568
|
-
)
|
|
569
|
-
|
|
570
|
-
def _process_form(self, path: str):
|
|
571
|
-
print(f"Processing: {path}")
|
|
572
|
-
"""
|
|
573
|
-
|
|
574
|
-
_watchers: List[FileWatcher]
|
|
575
|
-
|
|
576
|
-
def watch_directory(
|
|
577
|
-
self,
|
|
578
|
-
directory: Union[str, Path],
|
|
579
|
-
on_created: Optional[EventCallback] = None,
|
|
580
|
-
on_modified: Optional[EventCallback] = None,
|
|
581
|
-
on_deleted: Optional[EventCallback] = None,
|
|
582
|
-
on_moved: Optional[MoveCallback] = None,
|
|
583
|
-
extensions: Optional[List[str]] = None,
|
|
584
|
-
filter_func: Optional[FilterCallback] = None,
|
|
585
|
-
debounce_seconds: float = 2.0,
|
|
586
|
-
recursive: bool = False,
|
|
587
|
-
auto_start: bool = True,
|
|
588
|
-
) -> FileWatcher:
|
|
589
|
-
"""
|
|
590
|
-
Watch a directory for file changes.
|
|
591
|
-
|
|
592
|
-
Args:
|
|
593
|
-
directory: Directory path to watch.
|
|
594
|
-
on_created: Callback for file creation.
|
|
595
|
-
on_modified: Callback for file modification.
|
|
596
|
-
on_deleted: Callback for file deletion.
|
|
597
|
-
on_moved: Callback for file move/rename.
|
|
598
|
-
extensions: File extensions to watch. None uses defaults, [] watches all.
|
|
599
|
-
filter_func: Custom filter predicate.
|
|
600
|
-
debounce_seconds: Debounce time between processing same file.
|
|
601
|
-
recursive: If True, watch subdirectories recursively.
|
|
602
|
-
auto_start: If True, start watching immediately.
|
|
603
|
-
|
|
604
|
-
Returns:
|
|
605
|
-
The FileWatcher instance.
|
|
606
|
-
|
|
607
|
-
Example:
|
|
608
|
-
self.watch_directory(
|
|
609
|
-
"./data",
|
|
610
|
-
on_created=self.handle_new_file,
|
|
611
|
-
extensions=[".pdf", ".txt"],
|
|
612
|
-
)
|
|
613
|
-
"""
|
|
614
|
-
# Initialize watchers list if needed
|
|
615
|
-
if not hasattr(self, "_watchers"):
|
|
616
|
-
self._watchers = []
|
|
617
|
-
|
|
618
|
-
watcher = FileWatcher(
|
|
619
|
-
directory=directory,
|
|
620
|
-
on_created=on_created,
|
|
621
|
-
on_modified=on_modified,
|
|
622
|
-
on_deleted=on_deleted,
|
|
623
|
-
on_moved=on_moved,
|
|
624
|
-
extensions=extensions,
|
|
625
|
-
filter_func=filter_func,
|
|
626
|
-
debounce_seconds=debounce_seconds,
|
|
627
|
-
recursive=recursive,
|
|
628
|
-
)
|
|
629
|
-
|
|
630
|
-
self._watchers.append(watcher)
|
|
631
|
-
|
|
632
|
-
if auto_start:
|
|
633
|
-
watcher.start()
|
|
634
|
-
|
|
635
|
-
return watcher
|
|
636
|
-
|
|
637
|
-
def stop_all_watchers(self) -> None:
|
|
638
|
-
"""Stop all file watchers."""
|
|
639
|
-
if hasattr(self, "_watchers"):
|
|
640
|
-
for watcher in self._watchers:
|
|
641
|
-
watcher.stop()
|
|
642
|
-
logger.info(f"Stopped {len(self._watchers)} file watcher(s)")
|
|
643
|
-
|
|
644
|
-
@property
|
|
645
|
-
def watchers(self) -> List[FileWatcher]:
|
|
646
|
-
"""List of active file watchers."""
|
|
647
|
-
if not hasattr(self, "_watchers"):
|
|
648
|
-
self._watchers = []
|
|
649
|
-
return self._watchers
|
|
650
|
-
|
|
651
|
-
@property
|
|
652
|
-
def watching_directories(self) -> List[Path]:
|
|
653
|
-
"""List of directories being watched."""
|
|
654
|
-
return [w.directory for w in self.watchers if w.is_running]
|
|
655
|
-
|
|
656
|
-
@property
|
|
657
|
-
def watcher_telemetry(self) -> Dict[str, Any]:
|
|
658
|
-
"""Combined telemetry from all watchers."""
|
|
659
|
-
combined = {
|
|
660
|
-
"files_created": 0,
|
|
661
|
-
"files_modified": 0,
|
|
662
|
-
"files_deleted": 0,
|
|
663
|
-
"files_moved": 0,
|
|
664
|
-
"total_events": 0,
|
|
665
|
-
"watcher_count": len(self.watchers),
|
|
666
|
-
"active_count": sum(1 for w in self.watchers if w.is_running),
|
|
667
|
-
}
|
|
668
|
-
for watcher in self.watchers:
|
|
669
|
-
t = watcher.telemetry
|
|
670
|
-
combined["files_created"] += t.get("files_created", 0)
|
|
671
|
-
combined["files_modified"] += t.get("files_modified", 0)
|
|
672
|
-
combined["files_deleted"] += t.get("files_deleted", 0)
|
|
673
|
-
combined["files_moved"] += t.get("files_moved", 0)
|
|
674
|
-
combined["total_events"] += t.get("total_events", 0)
|
|
675
|
-
return combined
|
|
1
|
+
# Copyright(C) 2025-2026 Advanced Micro Devices, Inc. All rights reserved.
|
|
2
|
+
# SPDX-License-Identifier: MIT
|
|
3
|
+
|
|
4
|
+
"""
|
|
5
|
+
Generic file watching utilities for GAIA agents.
|
|
6
|
+
|
|
7
|
+
Provides FileChangeHandler and FileWatcher for monitoring directories
|
|
8
|
+
and responding to file system events with callbacks.
|
|
9
|
+
|
|
10
|
+
Also provides file hashing utilities for duplicate detection.
|
|
11
|
+
|
|
12
|
+
Example:
|
|
13
|
+
from gaia.utils import FileChangeHandler, FileWatcher, compute_file_hash
|
|
14
|
+
|
|
15
|
+
def on_new_file(path: str):
|
|
16
|
+
print(f"New file: {path}")
|
|
17
|
+
file_hash = compute_file_hash(path)
|
|
18
|
+
print(f"Hash: {file_hash}")
|
|
19
|
+
|
|
20
|
+
watcher = FileWatcher(
|
|
21
|
+
directory="./data",
|
|
22
|
+
on_created=on_new_file,
|
|
23
|
+
extensions=[".pdf", ".txt"],
|
|
24
|
+
)
|
|
25
|
+
watcher.start()
|
|
26
|
+
"""
|
|
27
|
+
|
|
28
|
+
import hashlib
|
|
29
|
+
import logging
|
|
30
|
+
import time
|
|
31
|
+
from pathlib import Path
|
|
32
|
+
from typing import Any, Callable, Dict, List, Optional, Set, Union
|
|
33
|
+
|
|
34
|
+
try:
|
|
35
|
+
from watchdog.events import FileSystemEvent, FileSystemEventHandler
|
|
36
|
+
from watchdog.observers import Observer
|
|
37
|
+
|
|
38
|
+
WATCHDOG_AVAILABLE = True
|
|
39
|
+
except ImportError:
|
|
40
|
+
# Create dummy base class when watchdog is not available
|
|
41
|
+
class FileSystemEventHandler:
|
|
42
|
+
"""Dummy base class when watchdog is not installed."""
|
|
43
|
+
|
|
44
|
+
class FileSystemEvent:
|
|
45
|
+
"""Dummy event class when watchdog is not installed."""
|
|
46
|
+
|
|
47
|
+
src_path: str = ""
|
|
48
|
+
dest_path: str = ""
|
|
49
|
+
is_directory: bool = False
|
|
50
|
+
|
|
51
|
+
Observer = None
|
|
52
|
+
WATCHDOG_AVAILABLE = False
|
|
53
|
+
|
|
54
|
+
logger = logging.getLogger(__name__)
|
|
55
|
+
|
|
56
|
+
# Type alias for event callbacks
|
|
57
|
+
EventCallback = Callable[[str], None]
|
|
58
|
+
MoveCallback = Callable[[str, str], None] # (src_path, dest_path)
|
|
59
|
+
FilterCallback = Callable[[str], bool]
|
|
60
|
+
|
|
61
|
+
# Default chunk size for file hashing (64KB)
|
|
62
|
+
HASH_CHUNK_SIZE = 65536
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
def compute_file_hash(
|
|
66
|
+
path: Union[str, Path],
|
|
67
|
+
algorithm: str = "sha256",
|
|
68
|
+
chunk_size: int = HASH_CHUNK_SIZE,
|
|
69
|
+
) -> Optional[str]:
|
|
70
|
+
"""
|
|
71
|
+
Compute a hash of a file's contents.
|
|
72
|
+
|
|
73
|
+
Uses chunked reading to handle large files efficiently without
|
|
74
|
+
loading the entire file into memory.
|
|
75
|
+
|
|
76
|
+
Args:
|
|
77
|
+
path: Path to the file to hash.
|
|
78
|
+
algorithm: Hash algorithm to use (default: sha256).
|
|
79
|
+
Supports any algorithm from hashlib.
|
|
80
|
+
chunk_size: Size of chunks to read at a time (default: 64KB).
|
|
81
|
+
|
|
82
|
+
Returns:
|
|
83
|
+
Hex-encoded hash string, or None if file cannot be read.
|
|
84
|
+
|
|
85
|
+
Example:
|
|
86
|
+
from gaia.utils import compute_file_hash
|
|
87
|
+
|
|
88
|
+
# Check if file was already processed
|
|
89
|
+
file_hash = compute_file_hash("intake_form.pdf")
|
|
90
|
+
if file_hash in processed_hashes:
|
|
91
|
+
print("Already processed")
|
|
92
|
+
else:
|
|
93
|
+
process_file("intake_form.pdf")
|
|
94
|
+
processed_hashes.add(file_hash)
|
|
95
|
+
"""
|
|
96
|
+
try:
|
|
97
|
+
file_path = Path(path)
|
|
98
|
+
if not file_path.exists() or not file_path.is_file():
|
|
99
|
+
return None
|
|
100
|
+
|
|
101
|
+
hasher = hashlib.new(algorithm)
|
|
102
|
+
with open(file_path, "rb") as f:
|
|
103
|
+
while chunk := f.read(chunk_size):
|
|
104
|
+
hasher.update(chunk)
|
|
105
|
+
return hasher.hexdigest()
|
|
106
|
+
except (OSError, IOError, ValueError) as e:
|
|
107
|
+
logger.warning(f"Could not compute hash for {path}: {e}")
|
|
108
|
+
return None
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def compute_bytes_hash(
|
|
112
|
+
data: bytes,
|
|
113
|
+
algorithm: str = "sha256",
|
|
114
|
+
) -> str:
|
|
115
|
+
"""
|
|
116
|
+
Compute a hash of bytes data.
|
|
117
|
+
|
|
118
|
+
Useful when the file content is already loaded in memory.
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
data: Bytes to hash.
|
|
122
|
+
algorithm: Hash algorithm to use (default: sha256).
|
|
123
|
+
|
|
124
|
+
Returns:
|
|
125
|
+
Hex-encoded hash string.
|
|
126
|
+
|
|
127
|
+
Example:
|
|
128
|
+
from gaia.utils import compute_bytes_hash
|
|
129
|
+
|
|
130
|
+
with open("file.pdf", "rb") as f:
|
|
131
|
+
content = f.read()
|
|
132
|
+
file_hash = compute_bytes_hash(content)
|
|
133
|
+
"""
|
|
134
|
+
hasher = hashlib.new(algorithm)
|
|
135
|
+
hasher.update(data)
|
|
136
|
+
return hasher.hexdigest()
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
class FileChangeHandler(FileSystemEventHandler):
|
|
140
|
+
"""
|
|
141
|
+
Generic handler for file system events.
|
|
142
|
+
|
|
143
|
+
A flexible, callback-based file system event handler that can be used
|
|
144
|
+
with any agent or application. Supports:
|
|
145
|
+
- Callbacks for created, modified, deleted, and moved events
|
|
146
|
+
- File extension filtering
|
|
147
|
+
- Custom filter predicates
|
|
148
|
+
- Debouncing to prevent duplicate events
|
|
149
|
+
- Telemetry tracking
|
|
150
|
+
|
|
151
|
+
Example:
|
|
152
|
+
from gaia.utils import FileChangeHandler
|
|
153
|
+
from watchdog.observers import Observer
|
|
154
|
+
|
|
155
|
+
def handle_new_file(path: str):
|
|
156
|
+
print(f"Processing: {path}")
|
|
157
|
+
|
|
158
|
+
handler = FileChangeHandler(
|
|
159
|
+
on_created=handle_new_file,
|
|
160
|
+
extensions=[".pdf", ".png", ".jpg"],
|
|
161
|
+
debounce_seconds=2.0,
|
|
162
|
+
)
|
|
163
|
+
|
|
164
|
+
observer = Observer()
|
|
165
|
+
observer.schedule(handler, "./intake_forms", recursive=False)
|
|
166
|
+
observer.start()
|
|
167
|
+
"""
|
|
168
|
+
|
|
169
|
+
# Default extensions for document processing
|
|
170
|
+
DEFAULT_EXTENSIONS: List[str] = [
|
|
171
|
+
".pdf",
|
|
172
|
+
".txt",
|
|
173
|
+
".md",
|
|
174
|
+
".markdown",
|
|
175
|
+
".csv",
|
|
176
|
+
".json",
|
|
177
|
+
".py",
|
|
178
|
+
".js",
|
|
179
|
+
".ts",
|
|
180
|
+
".java",
|
|
181
|
+
".cpp",
|
|
182
|
+
".c",
|
|
183
|
+
".html",
|
|
184
|
+
".css",
|
|
185
|
+
".yaml",
|
|
186
|
+
".yml",
|
|
187
|
+
".xml",
|
|
188
|
+
".rst",
|
|
189
|
+
".log",
|
|
190
|
+
]
|
|
191
|
+
|
|
192
|
+
def __init__(
|
|
193
|
+
self,
|
|
194
|
+
on_created: Optional[EventCallback] = None,
|
|
195
|
+
on_modified: Optional[EventCallback] = None,
|
|
196
|
+
on_deleted: Optional[EventCallback] = None,
|
|
197
|
+
on_moved: Optional[MoveCallback] = None,
|
|
198
|
+
extensions: Optional[List[str]] = None,
|
|
199
|
+
filter_func: Optional[FilterCallback] = None,
|
|
200
|
+
debounce_seconds: float = 2.0,
|
|
201
|
+
ignore_directories: bool = True,
|
|
202
|
+
):
|
|
203
|
+
"""
|
|
204
|
+
Initialize FileChangeHandler.
|
|
205
|
+
|
|
206
|
+
Args:
|
|
207
|
+
on_created: Callback for file creation. Receives file path.
|
|
208
|
+
on_modified: Callback for file modification. Receives file path.
|
|
209
|
+
on_deleted: Callback for file deletion. Receives file path.
|
|
210
|
+
on_moved: Callback for file move/rename. Receives (src_path, dest_path).
|
|
211
|
+
extensions: List of file extensions to watch (e.g., [".pdf", ".txt"]).
|
|
212
|
+
If None, uses DEFAULT_EXTENSIONS.
|
|
213
|
+
If empty list [], watches all files.
|
|
214
|
+
filter_func: Custom filter function. If provided, called with file path
|
|
215
|
+
and should return True to process the event.
|
|
216
|
+
Takes precedence over extensions filter.
|
|
217
|
+
debounce_seconds: Minimum time between processing same file.
|
|
218
|
+
ignore_directories: If True, ignores directory events.
|
|
219
|
+
|
|
220
|
+
Example:
|
|
221
|
+
# Watch only PDFs and images
|
|
222
|
+
handler = FileChangeHandler(
|
|
223
|
+
on_created=process_file,
|
|
224
|
+
extensions=[".pdf", ".png", ".jpg"],
|
|
225
|
+
)
|
|
226
|
+
|
|
227
|
+
# Watch all files with custom filter
|
|
228
|
+
handler = FileChangeHandler(
|
|
229
|
+
on_created=process_file,
|
|
230
|
+
extensions=[], # Watch all
|
|
231
|
+
filter_func=lambda p: not p.startswith("."), # Exclude hidden
|
|
232
|
+
)
|
|
233
|
+
"""
|
|
234
|
+
super().__init__()
|
|
235
|
+
self._on_created = on_created
|
|
236
|
+
self._on_modified = on_modified
|
|
237
|
+
self._on_deleted = on_deleted
|
|
238
|
+
self._on_moved = on_moved
|
|
239
|
+
|
|
240
|
+
# Set up extensions filter
|
|
241
|
+
if extensions is None:
|
|
242
|
+
self._extensions: Set[str] = set(self.DEFAULT_EXTENSIONS)
|
|
243
|
+
else:
|
|
244
|
+
# Normalize extensions to lowercase with leading dot
|
|
245
|
+
self._extensions = {
|
|
246
|
+
ext.lower() if ext.startswith(".") else f".{ext.lower()}"
|
|
247
|
+
for ext in extensions
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
self._filter_func = filter_func
|
|
251
|
+
self._debounce_seconds = debounce_seconds
|
|
252
|
+
self._ignore_directories = ignore_directories
|
|
253
|
+
|
|
254
|
+
# Debounce tracking
|
|
255
|
+
self._last_processed: Dict[str, float] = {}
|
|
256
|
+
self._max_cache_size = 1000
|
|
257
|
+
|
|
258
|
+
# Telemetry
|
|
259
|
+
self._telemetry: Dict[str, Any] = {
|
|
260
|
+
"files_created": 0,
|
|
261
|
+
"files_modified": 0,
|
|
262
|
+
"files_deleted": 0,
|
|
263
|
+
"files_moved": 0,
|
|
264
|
+
"total_events": 0,
|
|
265
|
+
"last_event_time": None,
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
def _should_process(self, file_path: str) -> bool:
|
|
269
|
+
"""Check if file should be processed based on filters."""
|
|
270
|
+
# Custom filter takes precedence
|
|
271
|
+
if self._filter_func is not None:
|
|
272
|
+
return self._filter_func(file_path)
|
|
273
|
+
|
|
274
|
+
# Empty extensions list means watch all files
|
|
275
|
+
if not self._extensions:
|
|
276
|
+
return True
|
|
277
|
+
|
|
278
|
+
# Check extension
|
|
279
|
+
file_lower = file_path.lower()
|
|
280
|
+
return any(file_lower.endswith(ext) for ext in self._extensions)
|
|
281
|
+
|
|
282
|
+
def _is_debounced(self, file_path: str) -> bool:
|
|
283
|
+
"""Check if file was recently processed (within debounce window)."""
|
|
284
|
+
current_time = time.time()
|
|
285
|
+
last_time = self._last_processed.get(file_path, 0)
|
|
286
|
+
|
|
287
|
+
if current_time - last_time <= self._debounce_seconds:
|
|
288
|
+
return True
|
|
289
|
+
|
|
290
|
+
# Update last processed time
|
|
291
|
+
self._last_processed[file_path] = current_time
|
|
292
|
+
|
|
293
|
+
# LRU cache eviction to prevent memory leaks
|
|
294
|
+
if len(self._last_processed) > self._max_cache_size:
|
|
295
|
+
num_to_remove = self._max_cache_size // 10
|
|
296
|
+
sorted_items = sorted(self._last_processed.items(), key=lambda x: x[1])
|
|
297
|
+
for path, _ in sorted_items[:num_to_remove]:
|
|
298
|
+
del self._last_processed[path]
|
|
299
|
+
logger.debug(f"Cleaned up {num_to_remove} old entries from debounce cache")
|
|
300
|
+
|
|
301
|
+
return False
|
|
302
|
+
|
|
303
|
+
def _update_telemetry(self, event_type: str) -> None:
|
|
304
|
+
"""Update telemetry statistics."""
|
|
305
|
+
self._telemetry[event_type] += 1
|
|
306
|
+
self._telemetry["total_events"] += 1
|
|
307
|
+
self._telemetry["last_event_time"] = time.time()
|
|
308
|
+
|
|
309
|
+
# Log telemetry periodically
|
|
310
|
+
if self._telemetry["total_events"] % 10 == 0:
|
|
311
|
+
logger.debug(
|
|
312
|
+
f"File Watch Telemetry: "
|
|
313
|
+
f"Created: {self._telemetry['files_created']}, "
|
|
314
|
+
f"Modified: {self._telemetry['files_modified']}, "
|
|
315
|
+
f"Deleted: {self._telemetry['files_deleted']}, "
|
|
316
|
+
f"Moved: {self._telemetry['files_moved']}, "
|
|
317
|
+
f"Total: {self._telemetry['total_events']}"
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
def on_created(self, event: FileSystemEvent) -> None:
|
|
321
|
+
"""Handle file creation."""
|
|
322
|
+
if self._ignore_directories and event.is_directory:
|
|
323
|
+
return
|
|
324
|
+
|
|
325
|
+
if self._on_created and self._should_process(event.src_path):
|
|
326
|
+
if not self._is_debounced(event.src_path):
|
|
327
|
+
logger.debug(f"File created: {event.src_path}")
|
|
328
|
+
try:
|
|
329
|
+
self._on_created(event.src_path)
|
|
330
|
+
self._update_telemetry("files_created")
|
|
331
|
+
except Exception as e:
|
|
332
|
+
logger.error(
|
|
333
|
+
f"Error in on_created callback for {event.src_path}: {e}"
|
|
334
|
+
)
|
|
335
|
+
|
|
336
|
+
def on_modified(self, event: FileSystemEvent) -> None:
|
|
337
|
+
"""Handle file modification."""
|
|
338
|
+
if self._ignore_directories and event.is_directory:
|
|
339
|
+
return
|
|
340
|
+
|
|
341
|
+
if self._on_modified and self._should_process(event.src_path):
|
|
342
|
+
if not self._is_debounced(event.src_path):
|
|
343
|
+
logger.debug(f"File modified: {event.src_path}")
|
|
344
|
+
try:
|
|
345
|
+
self._on_modified(event.src_path)
|
|
346
|
+
self._update_telemetry("files_modified")
|
|
347
|
+
except Exception as e:
|
|
348
|
+
logger.error(
|
|
349
|
+
f"Error in on_modified callback for {event.src_path}: {e}"
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
def on_deleted(self, event: FileSystemEvent) -> None:
|
|
353
|
+
"""Handle file deletion."""
|
|
354
|
+
if self._ignore_directories and event.is_directory:
|
|
355
|
+
return
|
|
356
|
+
|
|
357
|
+
if self._on_deleted and self._should_process(event.src_path):
|
|
358
|
+
logger.debug(f"File deleted: {event.src_path}")
|
|
359
|
+
try:
|
|
360
|
+
self._on_deleted(event.src_path)
|
|
361
|
+
self._update_telemetry("files_deleted")
|
|
362
|
+
# Clean up from debounce cache
|
|
363
|
+
self._last_processed.pop(event.src_path, None)
|
|
364
|
+
except Exception as e:
|
|
365
|
+
logger.error(f"Error in on_deleted callback for {event.src_path}: {e}")
|
|
366
|
+
|
|
367
|
+
def on_moved(self, event: FileSystemEvent) -> None:
|
|
368
|
+
"""Handle file move/rename."""
|
|
369
|
+
if self._ignore_directories and event.is_directory:
|
|
370
|
+
return
|
|
371
|
+
|
|
372
|
+
src_path = event.src_path
|
|
373
|
+
dest_path = getattr(event, "dest_path", None)
|
|
374
|
+
|
|
375
|
+
if self._on_moved and dest_path:
|
|
376
|
+
# Process if either source or destination matches filter
|
|
377
|
+
if self._should_process(src_path) or self._should_process(dest_path):
|
|
378
|
+
logger.debug(f"File moved: {src_path} -> {dest_path}")
|
|
379
|
+
try:
|
|
380
|
+
self._on_moved(src_path, dest_path)
|
|
381
|
+
self._update_telemetry("files_moved")
|
|
382
|
+
# Update debounce cache
|
|
383
|
+
self._last_processed.pop(src_path, None)
|
|
384
|
+
except Exception as e:
|
|
385
|
+
logger.error(f"Error in on_moved callback for {src_path}: {e}")
|
|
386
|
+
|
|
387
|
+
@property
|
|
388
|
+
def telemetry(self) -> Dict[str, Any]:
|
|
389
|
+
"""Get current telemetry statistics."""
|
|
390
|
+
return self._telemetry.copy()
|
|
391
|
+
|
|
392
|
+
def reset_telemetry(self) -> None:
|
|
393
|
+
"""Reset telemetry counters."""
|
|
394
|
+
self._telemetry = {
|
|
395
|
+
"files_created": 0,
|
|
396
|
+
"files_modified": 0,
|
|
397
|
+
"files_deleted": 0,
|
|
398
|
+
"files_moved": 0,
|
|
399
|
+
"total_events": 0,
|
|
400
|
+
"last_event_time": None,
|
|
401
|
+
}
|
|
402
|
+
|
|
403
|
+
|
|
404
|
+
class FileWatcher:
|
|
405
|
+
"""
|
|
406
|
+
Convenience wrapper for watching a directory with FileChangeHandler.
|
|
407
|
+
|
|
408
|
+
Combines Observer and FileChangeHandler for easy directory watching.
|
|
409
|
+
Handles start/stop lifecycle and provides a clean API.
|
|
410
|
+
|
|
411
|
+
Example:
|
|
412
|
+
from gaia.utils import FileWatcher
|
|
413
|
+
|
|
414
|
+
def process_intake(path: str):
|
|
415
|
+
print(f"Processing intake form: {path}")
|
|
416
|
+
|
|
417
|
+
watcher = FileWatcher(
|
|
418
|
+
directory="./intake_forms",
|
|
419
|
+
on_created=process_intake,
|
|
420
|
+
extensions=[".pdf", ".png", ".jpg"],
|
|
421
|
+
)
|
|
422
|
+
|
|
423
|
+
watcher.start()
|
|
424
|
+
# ... do work ...
|
|
425
|
+
watcher.stop()
|
|
426
|
+
|
|
427
|
+
# Or use as context manager:
|
|
428
|
+
with FileWatcher("./data", on_created=process) as watcher:
|
|
429
|
+
# watcher is running
|
|
430
|
+
pass
|
|
431
|
+
# watcher is stopped
|
|
432
|
+
"""
|
|
433
|
+
|
|
434
|
+
def __init__(
|
|
435
|
+
self,
|
|
436
|
+
directory: Union[str, Path],
|
|
437
|
+
on_created: Optional[EventCallback] = None,
|
|
438
|
+
on_modified: Optional[EventCallback] = None,
|
|
439
|
+
on_deleted: Optional[EventCallback] = None,
|
|
440
|
+
on_moved: Optional[MoveCallback] = None,
|
|
441
|
+
extensions: Optional[List[str]] = None,
|
|
442
|
+
filter_func: Optional[FilterCallback] = None,
|
|
443
|
+
debounce_seconds: float = 2.0,
|
|
444
|
+
recursive: bool = False,
|
|
445
|
+
):
|
|
446
|
+
"""
|
|
447
|
+
Initialize FileWatcher.
|
|
448
|
+
|
|
449
|
+
Args:
|
|
450
|
+
directory: Directory path to watch.
|
|
451
|
+
on_created: Callback for file creation.
|
|
452
|
+
on_modified: Callback for file modification.
|
|
453
|
+
on_deleted: Callback for file deletion.
|
|
454
|
+
on_moved: Callback for file move/rename.
|
|
455
|
+
extensions: File extensions to watch. None uses defaults, [] watches all.
|
|
456
|
+
filter_func: Custom filter predicate.
|
|
457
|
+
debounce_seconds: Debounce time between processing same file.
|
|
458
|
+
recursive: If True, watch subdirectories recursively.
|
|
459
|
+
|
|
460
|
+
Raises:
|
|
461
|
+
ImportError: If watchdog package is not installed.
|
|
462
|
+
FileNotFoundError: If directory does not exist.
|
|
463
|
+
"""
|
|
464
|
+
if not WATCHDOG_AVAILABLE:
|
|
465
|
+
raise ImportError(
|
|
466
|
+
"FileWatcher requires the 'watchdog' package.\n"
|
|
467
|
+
"Install with: pip install 'watchdog>=2.1.0'\n"
|
|
468
|
+
"Or: uv pip install -e '.[dev]'"
|
|
469
|
+
)
|
|
470
|
+
|
|
471
|
+
self._directory = Path(directory)
|
|
472
|
+
if not self._directory.exists():
|
|
473
|
+
raise FileNotFoundError(f"Directory does not exist: {directory}")
|
|
474
|
+
|
|
475
|
+
self._recursive = recursive
|
|
476
|
+
self._observer: Optional[Observer] = None
|
|
477
|
+
|
|
478
|
+
self._handler = FileChangeHandler(
|
|
479
|
+
on_created=on_created,
|
|
480
|
+
on_modified=on_modified,
|
|
481
|
+
on_deleted=on_deleted,
|
|
482
|
+
on_moved=on_moved,
|
|
483
|
+
extensions=extensions,
|
|
484
|
+
filter_func=filter_func,
|
|
485
|
+
debounce_seconds=debounce_seconds,
|
|
486
|
+
)
|
|
487
|
+
|
|
488
|
+
def start(self) -> None:
|
|
489
|
+
"""
|
|
490
|
+
Start watching the directory.
|
|
491
|
+
|
|
492
|
+
Safe to call multiple times - will not start multiple observers.
|
|
493
|
+
"""
|
|
494
|
+
if self._observer is not None:
|
|
495
|
+
logger.warning("FileWatcher already running")
|
|
496
|
+
return
|
|
497
|
+
|
|
498
|
+
self._observer = Observer()
|
|
499
|
+
self._observer.schedule(
|
|
500
|
+
self._handler,
|
|
501
|
+
str(self._directory),
|
|
502
|
+
recursive=self._recursive,
|
|
503
|
+
)
|
|
504
|
+
self._observer.start()
|
|
505
|
+
logger.info(
|
|
506
|
+
f"Started watching: {self._directory} " f"(recursive={self._recursive})"
|
|
507
|
+
)
|
|
508
|
+
|
|
509
|
+
def stop(self) -> None:
|
|
510
|
+
"""
|
|
511
|
+
Stop watching the directory.
|
|
512
|
+
|
|
513
|
+
Safe to call multiple times.
|
|
514
|
+
"""
|
|
515
|
+
if self._observer is not None:
|
|
516
|
+
self._observer.stop()
|
|
517
|
+
self._observer.join(timeout=5.0)
|
|
518
|
+
self._observer = None
|
|
519
|
+
logger.info(f"Stopped watching: {self._directory}")
|
|
520
|
+
|
|
521
|
+
@property
|
|
522
|
+
def is_running(self) -> bool:
|
|
523
|
+
"""True if watcher is currently running."""
|
|
524
|
+
return self._observer is not None and self._observer.is_alive()
|
|
525
|
+
|
|
526
|
+
@property
|
|
527
|
+
def directory(self) -> Path:
|
|
528
|
+
"""Directory being watched."""
|
|
529
|
+
return self._directory
|
|
530
|
+
|
|
531
|
+
@property
|
|
532
|
+
def telemetry(self) -> Dict[str, Any]:
|
|
533
|
+
"""Get telemetry from the handler."""
|
|
534
|
+
return self._handler.telemetry
|
|
535
|
+
|
|
536
|
+
def __enter__(self) -> "FileWatcher":
|
|
537
|
+
"""Context manager entry - starts watching."""
|
|
538
|
+
self.start()
|
|
539
|
+
return self
|
|
540
|
+
|
|
541
|
+
def __exit__(self, exc_type, exc_val, exc_tb) -> None:
|
|
542
|
+
"""Context manager exit - stops watching."""
|
|
543
|
+
self.stop()
|
|
544
|
+
|
|
545
|
+
|
|
546
|
+
def check_watchdog_available() -> bool:
|
|
547
|
+
"""Check if watchdog package is available."""
|
|
548
|
+
return WATCHDOG_AVAILABLE
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
class FileWatcherMixin:
|
|
552
|
+
"""
|
|
553
|
+
Mixin providing file watching capabilities for GAIA agents.
|
|
554
|
+
|
|
555
|
+
Manages multiple FileWatcher instances with automatic cleanup.
|
|
556
|
+
|
|
557
|
+
Example:
|
|
558
|
+
from gaia import Agent, FileWatcherMixin
|
|
559
|
+
|
|
560
|
+
class IntakeAgent(Agent, FileWatcherMixin):
|
|
561
|
+
def __init__(self, **kwargs):
|
|
562
|
+
super().__init__(**kwargs)
|
|
563
|
+
|
|
564
|
+
self.watch_directory(
|
|
565
|
+
"./intake_forms",
|
|
566
|
+
on_created=self._process_form,
|
|
567
|
+
extensions=[".pdf", ".png"],
|
|
568
|
+
)
|
|
569
|
+
|
|
570
|
+
def _process_form(self, path: str):
|
|
571
|
+
print(f"Processing: {path}")
|
|
572
|
+
"""
|
|
573
|
+
|
|
574
|
+
_watchers: List[FileWatcher]
|
|
575
|
+
|
|
576
|
+
def watch_directory(
|
|
577
|
+
self,
|
|
578
|
+
directory: Union[str, Path],
|
|
579
|
+
on_created: Optional[EventCallback] = None,
|
|
580
|
+
on_modified: Optional[EventCallback] = None,
|
|
581
|
+
on_deleted: Optional[EventCallback] = None,
|
|
582
|
+
on_moved: Optional[MoveCallback] = None,
|
|
583
|
+
extensions: Optional[List[str]] = None,
|
|
584
|
+
filter_func: Optional[FilterCallback] = None,
|
|
585
|
+
debounce_seconds: float = 2.0,
|
|
586
|
+
recursive: bool = False,
|
|
587
|
+
auto_start: bool = True,
|
|
588
|
+
) -> FileWatcher:
|
|
589
|
+
"""
|
|
590
|
+
Watch a directory for file changes.
|
|
591
|
+
|
|
592
|
+
Args:
|
|
593
|
+
directory: Directory path to watch.
|
|
594
|
+
on_created: Callback for file creation.
|
|
595
|
+
on_modified: Callback for file modification.
|
|
596
|
+
on_deleted: Callback for file deletion.
|
|
597
|
+
on_moved: Callback for file move/rename.
|
|
598
|
+
extensions: File extensions to watch. None uses defaults, [] watches all.
|
|
599
|
+
filter_func: Custom filter predicate.
|
|
600
|
+
debounce_seconds: Debounce time between processing same file.
|
|
601
|
+
recursive: If True, watch subdirectories recursively.
|
|
602
|
+
auto_start: If True, start watching immediately.
|
|
603
|
+
|
|
604
|
+
Returns:
|
|
605
|
+
The FileWatcher instance.
|
|
606
|
+
|
|
607
|
+
Example:
|
|
608
|
+
self.watch_directory(
|
|
609
|
+
"./data",
|
|
610
|
+
on_created=self.handle_new_file,
|
|
611
|
+
extensions=[".pdf", ".txt"],
|
|
612
|
+
)
|
|
613
|
+
"""
|
|
614
|
+
# Initialize watchers list if needed
|
|
615
|
+
if not hasattr(self, "_watchers"):
|
|
616
|
+
self._watchers = []
|
|
617
|
+
|
|
618
|
+
watcher = FileWatcher(
|
|
619
|
+
directory=directory,
|
|
620
|
+
on_created=on_created,
|
|
621
|
+
on_modified=on_modified,
|
|
622
|
+
on_deleted=on_deleted,
|
|
623
|
+
on_moved=on_moved,
|
|
624
|
+
extensions=extensions,
|
|
625
|
+
filter_func=filter_func,
|
|
626
|
+
debounce_seconds=debounce_seconds,
|
|
627
|
+
recursive=recursive,
|
|
628
|
+
)
|
|
629
|
+
|
|
630
|
+
self._watchers.append(watcher)
|
|
631
|
+
|
|
632
|
+
if auto_start:
|
|
633
|
+
watcher.start()
|
|
634
|
+
|
|
635
|
+
return watcher
|
|
636
|
+
|
|
637
|
+
def stop_all_watchers(self) -> None:
|
|
638
|
+
"""Stop all file watchers."""
|
|
639
|
+
if hasattr(self, "_watchers"):
|
|
640
|
+
for watcher in self._watchers:
|
|
641
|
+
watcher.stop()
|
|
642
|
+
logger.info(f"Stopped {len(self._watchers)} file watcher(s)")
|
|
643
|
+
|
|
644
|
+
@property
|
|
645
|
+
def watchers(self) -> List[FileWatcher]:
|
|
646
|
+
"""List of active file watchers."""
|
|
647
|
+
if not hasattr(self, "_watchers"):
|
|
648
|
+
self._watchers = []
|
|
649
|
+
return self._watchers
|
|
650
|
+
|
|
651
|
+
@property
|
|
652
|
+
def watching_directories(self) -> List[Path]:
|
|
653
|
+
"""List of directories being watched."""
|
|
654
|
+
return [w.directory for w in self.watchers if w.is_running]
|
|
655
|
+
|
|
656
|
+
@property
|
|
657
|
+
def watcher_telemetry(self) -> Dict[str, Any]:
|
|
658
|
+
"""Combined telemetry from all watchers."""
|
|
659
|
+
combined = {
|
|
660
|
+
"files_created": 0,
|
|
661
|
+
"files_modified": 0,
|
|
662
|
+
"files_deleted": 0,
|
|
663
|
+
"files_moved": 0,
|
|
664
|
+
"total_events": 0,
|
|
665
|
+
"watcher_count": len(self.watchers),
|
|
666
|
+
"active_count": sum(1 for w in self.watchers if w.is_running),
|
|
667
|
+
}
|
|
668
|
+
for watcher in self.watchers:
|
|
669
|
+
t = watcher.telemetry
|
|
670
|
+
combined["files_created"] += t.get("files_created", 0)
|
|
671
|
+
combined["files_modified"] += t.get("files_modified", 0)
|
|
672
|
+
combined["files_deleted"] += t.get("files_deleted", 0)
|
|
673
|
+
combined["files_moved"] += t.get("files_moved", 0)
|
|
674
|
+
combined["total_events"] += t.get("total_events", 0)
|
|
675
|
+
return combined
|