monoco-toolkit 0.3.9__py3-none-any.whl → 0.3.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- monoco/__main__.py +8 -0
- monoco/core/artifacts/__init__.py +16 -0
- monoco/core/artifacts/manager.py +575 -0
- monoco/core/artifacts/models.py +161 -0
- monoco/core/config.py +38 -4
- monoco/core/git.py +23 -0
- monoco/core/hooks/builtin/git_cleanup.py +1 -1
- monoco/core/ingestion/__init__.py +20 -0
- monoco/core/ingestion/discovery.py +248 -0
- monoco/core/ingestion/watcher.py +343 -0
- monoco/core/ingestion/worker.py +436 -0
- monoco/core/injection.py +63 -29
- monoco/core/integrations.py +2 -2
- monoco/core/loader.py +633 -0
- monoco/core/output.py +5 -5
- monoco/core/registry.py +34 -19
- monoco/core/resource/__init__.py +5 -0
- monoco/core/resource/finder.py +98 -0
- monoco/core/resource/manager.py +91 -0
- monoco/core/resource/models.py +35 -0
- monoco/core/skill_framework.py +292 -0
- monoco/core/skills.py +524 -385
- monoco/core/sync.py +73 -1
- monoco/core/workflow_converter.py +420 -0
- monoco/daemon/app.py +77 -1
- monoco/daemon/commands.py +10 -0
- monoco/daemon/mailroom_service.py +196 -0
- monoco/daemon/models.py +1 -0
- monoco/daemon/scheduler.py +236 -0
- monoco/daemon/services.py +185 -0
- monoco/daemon/triggers.py +55 -0
- monoco/features/agent/__init__.py +2 -2
- monoco/features/agent/adapter.py +41 -0
- monoco/features/agent/apoptosis.py +44 -0
- monoco/features/agent/cli.py +101 -144
- monoco/features/agent/config.py +35 -21
- monoco/features/agent/defaults.py +6 -49
- monoco/features/agent/engines.py +32 -6
- monoco/features/agent/manager.py +47 -6
- monoco/features/agent/models.py +2 -2
- monoco/features/agent/resources/atoms/atom-code-dev.yaml +61 -0
- monoco/features/agent/resources/atoms/atom-issue-lifecycle.yaml +73 -0
- monoco/features/agent/resources/atoms/atom-knowledge.yaml +55 -0
- monoco/features/agent/resources/atoms/atom-review.yaml +60 -0
- monoco/{core/resources/en → features/agent/resources/en/skills/monoco_atom_core}/SKILL.md +3 -1
- monoco/features/agent/resources/en/skills/monoco_workflow_agent_engineer/SKILL.md +94 -0
- monoco/features/agent/resources/en/skills/monoco_workflow_agent_manager/SKILL.md +93 -0
- monoco/features/agent/resources/en/skills/monoco_workflow_agent_planner/SKILL.md +85 -0
- monoco/features/agent/resources/en/skills/monoco_workflow_agent_reviewer/SKILL.md +114 -0
- monoco/features/agent/resources/workflows/workflow-dev.yaml +83 -0
- monoco/features/agent/resources/workflows/workflow-issue-create.yaml +72 -0
- monoco/features/agent/resources/workflows/workflow-review.yaml +94 -0
- monoco/features/agent/resources/zh/roles/monoco_role_engineer.yaml +49 -0
- monoco/features/agent/resources/zh/roles/monoco_role_manager.yaml +46 -0
- monoco/features/agent/resources/zh/roles/monoco_role_planner.yaml +46 -0
- monoco/features/agent/resources/zh/roles/monoco_role_reviewer.yaml +47 -0
- monoco/{core/resources/zh → features/agent/resources/zh/skills/monoco_atom_core}/SKILL.md +3 -1
- monoco/features/agent/resources/{skills/flow_engineer → zh/skills/monoco_workflow_agent_engineer}/SKILL.md +2 -2
- monoco/features/agent/resources/{skills/flow_manager → zh/skills/monoco_workflow_agent_manager}/SKILL.md +2 -2
- monoco/features/agent/resources/zh/skills/monoco_workflow_agent_planner/SKILL.md +259 -0
- monoco/features/agent/resources/zh/skills/monoco_workflow_agent_reviewer/SKILL.md +137 -0
- monoco/features/agent/session.py +59 -11
- monoco/features/agent/worker.py +38 -2
- monoco/features/artifact/__init__.py +0 -0
- monoco/features/artifact/adapter.py +33 -0
- monoco/features/artifact/resources/zh/AGENTS.md +14 -0
- monoco/features/artifact/resources/zh/skills/monoco_atom_artifact/SKILL.md +278 -0
- monoco/features/glossary/__init__.py +0 -0
- monoco/features/glossary/adapter.py +42 -0
- monoco/features/glossary/config.py +5 -0
- monoco/features/glossary/resources/en/AGENTS.md +29 -0
- monoco/features/glossary/resources/en/skills/monoco_atom_glossary/SKILL.md +35 -0
- monoco/features/glossary/resources/zh/AGENTS.md +29 -0
- monoco/features/glossary/resources/zh/skills/monoco_atom_glossary/SKILL.md +35 -0
- monoco/features/hooks/__init__.py +11 -0
- monoco/features/hooks/adapter.py +67 -0
- monoco/features/hooks/commands.py +309 -0
- monoco/features/hooks/core.py +441 -0
- monoco/features/hooks/resources/ADDING_HOOKS.md +234 -0
- monoco/features/i18n/adapter.py +18 -5
- monoco/features/i18n/core.py +482 -17
- monoco/features/i18n/resources/en/{SKILL.md → skills/monoco_atom_i18n/SKILL.md} +3 -1
- monoco/features/i18n/resources/en/skills/monoco_workflow_i18n_scan/SKILL.md +105 -0
- monoco/features/i18n/resources/zh/{SKILL.md → skills/monoco_atom_i18n/SKILL.md} +3 -1
- monoco/features/i18n/resources/{skills/i18n_scan_workflow → zh/skills/monoco_workflow_i18n_scan}/SKILL.md +2 -2
- monoco/features/issue/adapter.py +19 -6
- monoco/features/issue/commands.py +281 -7
- monoco/features/issue/core.py +272 -19
- monoco/features/issue/engine/machine.py +118 -5
- monoco/features/issue/linter.py +60 -5
- monoco/features/issue/models.py +3 -2
- monoco/features/issue/resources/en/AGENTS.md +109 -0
- monoco/features/issue/resources/en/{SKILL.md → skills/monoco_atom_issue/SKILL.md} +3 -1
- monoco/features/issue/resources/en/skills/monoco_workflow_issue_creation/SKILL.md +167 -0
- monoco/features/issue/resources/en/skills/monoco_workflow_issue_development/SKILL.md +224 -0
- monoco/features/issue/resources/en/skills/monoco_workflow_issue_management/SKILL.md +159 -0
- monoco/features/issue/resources/en/skills/monoco_workflow_issue_refinement/SKILL.md +203 -0
- monoco/features/issue/resources/hooks/post-checkout.sh +39 -0
- monoco/features/issue/resources/hooks/pre-commit.sh +41 -0
- monoco/features/issue/resources/hooks/pre-push.sh +35 -0
- monoco/features/issue/resources/zh/AGENTS.md +109 -0
- monoco/features/issue/resources/zh/{SKILL.md → skills/monoco_atom_issue_lifecycle/SKILL.md} +3 -1
- monoco/features/issue/resources/zh/skills/monoco_workflow_issue_creation/SKILL.md +167 -0
- monoco/features/issue/resources/zh/skills/monoco_workflow_issue_development/SKILL.md +224 -0
- monoco/features/issue/resources/{skills/issue_lifecycle_workflow → zh/skills/monoco_workflow_issue_management}/SKILL.md +2 -2
- monoco/features/issue/resources/zh/skills/monoco_workflow_issue_refinement/SKILL.md +203 -0
- monoco/features/issue/validator.py +101 -1
- monoco/features/memo/adapter.py +21 -8
- monoco/features/memo/cli.py +103 -10
- monoco/features/memo/core.py +178 -92
- monoco/features/memo/models.py +53 -0
- monoco/features/memo/resources/en/skills/monoco_atom_memo/SKILL.md +77 -0
- monoco/features/memo/resources/en/skills/monoco_workflow_note_processing/SKILL.md +140 -0
- monoco/features/memo/resources/zh/{SKILL.md → skills/monoco_atom_memo/SKILL.md} +3 -1
- monoco/features/memo/resources/{skills/note_processing_workflow → zh/skills/monoco_workflow_note_processing}/SKILL.md +2 -2
- monoco/features/spike/adapter.py +18 -5
- monoco/features/spike/resources/en/{SKILL.md → skills/monoco_atom_spike/SKILL.md} +3 -1
- monoco/features/spike/resources/en/skills/monoco_workflow_research/SKILL.md +121 -0
- monoco/features/spike/resources/zh/{SKILL.md → skills/monoco_atom_spike/SKILL.md} +3 -1
- monoco/features/spike/resources/{skills/research_workflow → zh/skills/monoco_workflow_research}/SKILL.md +2 -2
- monoco/main.py +38 -1
- monoco_toolkit-0.3.11.dist-info/METADATA +130 -0
- monoco_toolkit-0.3.11.dist-info/RECORD +181 -0
- monoco/features/agent/reliability.py +0 -106
- monoco/features/agent/resources/skills/flow_reviewer/SKILL.md +0 -114
- monoco_toolkit-0.3.9.dist-info/METADATA +0 -127
- monoco_toolkit-0.3.9.dist-info/RECORD +0 -115
- /monoco/{core → features/agent}/resources/en/AGENTS.md +0 -0
- /monoco/{core → features/agent}/resources/zh/AGENTS.md +0 -0
- {monoco_toolkit-0.3.9.dist-info → monoco_toolkit-0.3.11.dist-info}/WHEEL +0 -0
- {monoco_toolkit-0.3.9.dist-info → monoco_toolkit-0.3.11.dist-info}/entry_points.txt +0 -0
- {monoco_toolkit-0.3.9.dist-info → monoco_toolkit-0.3.11.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,343 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Dropzone Watcher for Monoco Mailroom.
|
|
3
|
+
|
|
4
|
+
Monitors dropzone directories for new files and triggers
|
|
5
|
+
automated ingestion workflows.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import asyncio
|
|
11
|
+
import logging
|
|
12
|
+
import uuid
|
|
13
|
+
from dataclasses import dataclass, field
|
|
14
|
+
from datetime import datetime, timezone
|
|
15
|
+
from enum import Enum
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Optional, Callable, Any, Set
|
|
18
|
+
|
|
19
|
+
from watchdog.observers import Observer
|
|
20
|
+
from watchdog.events import FileSystemEventHandler, FileCreatedEvent, FileMovedEvent
|
|
21
|
+
|
|
22
|
+
from .worker import ConversionWorker, ConversionTask, ConversionResult, ConversionStatus
|
|
23
|
+
from ..artifacts.manager import ArtifactManager
|
|
24
|
+
from ..artifacts.models import ArtifactSourceType
|
|
25
|
+
|
|
26
|
+
logger = logging.getLogger(__name__)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class IngestionEventType(str, Enum):
|
|
30
|
+
"""Types of ingestion events."""
|
|
31
|
+
FILE_DETECTED = "file_detected"
|
|
32
|
+
CONVERSION_STARTED = "conversion_started"
|
|
33
|
+
CONVERSION_COMPLETED = "conversion_completed"
|
|
34
|
+
CONVERSION_FAILED = "conversion_failed"
|
|
35
|
+
ARTIFACT_REGISTERED = "artifact_registered"
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class IngestionEvent:
|
|
40
|
+
"""Event emitted during the ingestion process."""
|
|
41
|
+
event_type: IngestionEventType
|
|
42
|
+
file_path: Path
|
|
43
|
+
task_id: Optional[str] = None
|
|
44
|
+
artifact_id: Optional[str] = None
|
|
45
|
+
error_message: Optional[str] = None
|
|
46
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
47
|
+
timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class DropzoneHandler(FileSystemEventHandler):
|
|
51
|
+
"""File system event handler for dropzone monitoring."""
|
|
52
|
+
|
|
53
|
+
def __init__(
|
|
54
|
+
self,
|
|
55
|
+
dropzone_path: Path,
|
|
56
|
+
on_file_detected: Callable[[Path], None],
|
|
57
|
+
supported_extensions: Optional[Set[str]] = None,
|
|
58
|
+
):
|
|
59
|
+
self.dropzone_path = Path(dropzone_path)
|
|
60
|
+
self.on_file_detected = on_file_detected
|
|
61
|
+
self.supported_extensions = supported_extensions or {
|
|
62
|
+
".docx", ".doc", ".pdf", ".odt",
|
|
63
|
+
".xlsx", ".xls", ".pptx", ".ppt",
|
|
64
|
+
}
|
|
65
|
+
self._processed_files: Set[Path] = set()
|
|
66
|
+
|
|
67
|
+
def on_created(self, event):
|
|
68
|
+
"""Handle file creation events."""
|
|
69
|
+
if event.is_directory:
|
|
70
|
+
return
|
|
71
|
+
|
|
72
|
+
file_path = Path(event.src_path)
|
|
73
|
+
if self._should_process(file_path):
|
|
74
|
+
self._processed_files.add(file_path.resolve())
|
|
75
|
+
self.on_file_detected(file_path)
|
|
76
|
+
|
|
77
|
+
def on_moved(self, event):
|
|
78
|
+
"""Handle file move events (e.g., atomic writes)."""
|
|
79
|
+
if event.is_directory:
|
|
80
|
+
return
|
|
81
|
+
|
|
82
|
+
file_path = Path(event.dest_path)
|
|
83
|
+
if self._should_process(file_path):
|
|
84
|
+
self._processed_files.add(file_path.resolve())
|
|
85
|
+
self.on_file_detected(file_path)
|
|
86
|
+
|
|
87
|
+
def _should_process(self, file_path: Path) -> bool:
|
|
88
|
+
"""Check if a file should be processed."""
|
|
89
|
+
# Skip hidden files
|
|
90
|
+
if file_path.name.startswith("."):
|
|
91
|
+
return False
|
|
92
|
+
|
|
93
|
+
# Skip temporary files
|
|
94
|
+
if file_path.suffix in (".tmp", ".temp", ".part"):
|
|
95
|
+
return False
|
|
96
|
+
|
|
97
|
+
# Check extension
|
|
98
|
+
if file_path.suffix.lower() not in self.supported_extensions:
|
|
99
|
+
return False
|
|
100
|
+
|
|
101
|
+
# Skip already processed
|
|
102
|
+
if file_path.resolve() in self._processed_files:
|
|
103
|
+
return False
|
|
104
|
+
|
|
105
|
+
return True
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class DropzoneWatcher:
|
|
109
|
+
"""
|
|
110
|
+
Watches dropzone directories and orchestrates automated ingestion.
|
|
111
|
+
|
|
112
|
+
Features:
|
|
113
|
+
- Real-time file system monitoring
|
|
114
|
+
- Automatic conversion using ConversionWorker
|
|
115
|
+
- Artifact registration with ArtifactManager
|
|
116
|
+
- Event callbacks for integration
|
|
117
|
+
"""
|
|
118
|
+
|
|
119
|
+
def __init__(
|
|
120
|
+
self,
|
|
121
|
+
dropzone_path: Path,
|
|
122
|
+
artifact_manager: ArtifactManager,
|
|
123
|
+
conversion_worker: Optional[ConversionWorker] = None,
|
|
124
|
+
output_dir: Optional[Path] = None,
|
|
125
|
+
process_existing: bool = False,
|
|
126
|
+
):
|
|
127
|
+
"""
|
|
128
|
+
Initialize the dropzone watcher.
|
|
129
|
+
|
|
130
|
+
Args:
|
|
131
|
+
dropzone_path: Directory to monitor for new files
|
|
132
|
+
artifact_manager: ArtifactManager for registering converted files
|
|
133
|
+
conversion_worker: ConversionWorker for document conversion
|
|
134
|
+
output_dir: Directory for converted files (default: dropzone/converted)
|
|
135
|
+
process_existing: Whether to process files already in dropzone
|
|
136
|
+
"""
|
|
137
|
+
self.dropzone_path = Path(dropzone_path)
|
|
138
|
+
self.artifact_manager = artifact_manager
|
|
139
|
+
self.conversion_worker = conversion_worker or ConversionWorker()
|
|
140
|
+
self.output_dir = output_dir or (self.dropzone_path / "converted")
|
|
141
|
+
self.process_existing = process_existing
|
|
142
|
+
|
|
143
|
+
# Event callbacks
|
|
144
|
+
self._on_event: Optional[Callable[[IngestionEvent], None]] = None
|
|
145
|
+
|
|
146
|
+
# State
|
|
147
|
+
self._observer: Optional[Observer] = None
|
|
148
|
+
self._running = False
|
|
149
|
+
self._pending_tasks: dict[str, asyncio.Task] = {}
|
|
150
|
+
|
|
151
|
+
def set_event_callback(self, callback: Callable[[IngestionEvent], None]) -> None:
|
|
152
|
+
"""Set callback for ingestion events."""
|
|
153
|
+
self._on_event = callback
|
|
154
|
+
|
|
155
|
+
def _emit_event(self, event: IngestionEvent) -> None:
|
|
156
|
+
"""Emit an ingestion event."""
|
|
157
|
+
if self._on_event:
|
|
158
|
+
try:
|
|
159
|
+
self._on_event(event)
|
|
160
|
+
except Exception:
|
|
161
|
+
pass
|
|
162
|
+
|
|
163
|
+
def start(self) -> None:
|
|
164
|
+
"""Start watching the dropzone directory."""
|
|
165
|
+
if self._running:
|
|
166
|
+
return
|
|
167
|
+
|
|
168
|
+
# Ensure directories exist
|
|
169
|
+
self.dropzone_path.mkdir(parents=True, exist_ok=True)
|
|
170
|
+
self.output_dir.mkdir(parents=True, exist_ok=True)
|
|
171
|
+
|
|
172
|
+
# Set up file system observer
|
|
173
|
+
self._handler = DropzoneHandler(
|
|
174
|
+
self.dropzone_path,
|
|
175
|
+
self._on_file_detected,
|
|
176
|
+
set(self.conversion_worker.get_supported_extensions()),
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
self._observer = Observer()
|
|
180
|
+
self._observer.schedule(self._handler, str(self.dropzone_path), recursive=False)
|
|
181
|
+
self._observer.start()
|
|
182
|
+
|
|
183
|
+
self._running = True
|
|
184
|
+
logger.info(f"Started watching dropzone: {self.dropzone_path}")
|
|
185
|
+
|
|
186
|
+
# Process existing files if requested
|
|
187
|
+
if self.process_existing:
|
|
188
|
+
self._scan_existing_files()
|
|
189
|
+
|
|
190
|
+
def stop(self) -> None:
|
|
191
|
+
"""Stop watching the dropzone directory."""
|
|
192
|
+
if not self._running:
|
|
193
|
+
return
|
|
194
|
+
|
|
195
|
+
self._running = False
|
|
196
|
+
|
|
197
|
+
# Cancel pending tasks
|
|
198
|
+
for task in self._pending_tasks.values():
|
|
199
|
+
task.cancel()
|
|
200
|
+
self._pending_tasks.clear()
|
|
201
|
+
|
|
202
|
+
# Stop observer
|
|
203
|
+
if self._observer:
|
|
204
|
+
self._observer.stop()
|
|
205
|
+
self._observer.join()
|
|
206
|
+
self._observer = None
|
|
207
|
+
|
|
208
|
+
logger.info(f"Stopped watching dropzone: {self.dropzone_path}")
|
|
209
|
+
|
|
210
|
+
def _scan_existing_files(self) -> None:
|
|
211
|
+
"""Scan and process existing files in dropzone."""
|
|
212
|
+
for file_path in self.dropzone_path.iterdir():
|
|
213
|
+
if file_path.is_file() and self._handler._should_process(file_path):
|
|
214
|
+
self._on_file_detected(file_path)
|
|
215
|
+
|
|
216
|
+
def _on_file_detected(self, file_path: Path) -> None:
|
|
217
|
+
"""Handle newly detected file."""
|
|
218
|
+
logger.info(f"File detected: {file_path}")
|
|
219
|
+
|
|
220
|
+
self._emit_event(IngestionEvent(
|
|
221
|
+
event_type=IngestionEventType.FILE_DETECTED,
|
|
222
|
+
file_path=file_path,
|
|
223
|
+
))
|
|
224
|
+
|
|
225
|
+
# Create async task for processing
|
|
226
|
+
task_id = str(uuid.uuid4())
|
|
227
|
+
asyncio.create_task(self._process_file(file_path, task_id))
|
|
228
|
+
|
|
229
|
+
async def _process_file(self, file_path: Path, task_id: str) -> None:
|
|
230
|
+
"""Process a detected file through the ingestion pipeline."""
|
|
231
|
+
try:
|
|
232
|
+
# Step 1: Check if conversion is needed/possible
|
|
233
|
+
if not self.conversion_worker.can_convert(file_path):
|
|
234
|
+
logger.warning(f"Cannot convert file: {file_path}")
|
|
235
|
+
self._emit_event(IngestionEvent(
|
|
236
|
+
event_type=IngestionEventType.CONVERSION_FAILED,
|
|
237
|
+
file_path=file_path,
|
|
238
|
+
task_id=task_id,
|
|
239
|
+
error_message="No conversion tool available for this file type",
|
|
240
|
+
))
|
|
241
|
+
return
|
|
242
|
+
|
|
243
|
+
# Step 2: Create conversion task
|
|
244
|
+
conversion_task = ConversionTask(
|
|
245
|
+
task_id=task_id,
|
|
246
|
+
source_path=file_path,
|
|
247
|
+
target_format="txt",
|
|
248
|
+
output_dir=self.output_dir,
|
|
249
|
+
)
|
|
250
|
+
|
|
251
|
+
self._emit_event(IngestionEvent(
|
|
252
|
+
event_type=IngestionEventType.CONVERSION_STARTED,
|
|
253
|
+
file_path=file_path,
|
|
254
|
+
task_id=task_id,
|
|
255
|
+
))
|
|
256
|
+
|
|
257
|
+
# Step 3: Perform conversion
|
|
258
|
+
result = await self.conversion_worker.submit(conversion_task)
|
|
259
|
+
|
|
260
|
+
if result.status != ConversionStatus.SUCCESS:
|
|
261
|
+
logger.error(f"Conversion failed for {file_path}: {result.error_message}")
|
|
262
|
+
self._emit_event(IngestionEvent(
|
|
263
|
+
event_type=IngestionEventType.CONVERSION_FAILED,
|
|
264
|
+
file_path=file_path,
|
|
265
|
+
task_id=task_id,
|
|
266
|
+
error_message=result.error_message,
|
|
267
|
+
))
|
|
268
|
+
return
|
|
269
|
+
|
|
270
|
+
self._emit_event(IngestionEvent(
|
|
271
|
+
event_type=IngestionEventType.CONVERSION_COMPLETED,
|
|
272
|
+
file_path=file_path,
|
|
273
|
+
task_id=task_id,
|
|
274
|
+
metadata={
|
|
275
|
+
"output_path": str(result.output_path),
|
|
276
|
+
"processing_time_ms": result.processing_time_ms,
|
|
277
|
+
},
|
|
278
|
+
))
|
|
279
|
+
|
|
280
|
+
# Step 4: Register as artifact
|
|
281
|
+
if result.output_path and result.output_path.exists():
|
|
282
|
+
artifact_meta = self._register_artifact(
|
|
283
|
+
result.output_path,
|
|
284
|
+
source_file=file_path,
|
|
285
|
+
conversion_metadata=result.metadata,
|
|
286
|
+
)
|
|
287
|
+
|
|
288
|
+
self._emit_event(IngestionEvent(
|
|
289
|
+
event_type=IngestionEventType.ARTIFACT_REGISTERED,
|
|
290
|
+
file_path=file_path,
|
|
291
|
+
task_id=task_id,
|
|
292
|
+
artifact_id=artifact_meta.artifact_id,
|
|
293
|
+
metadata={
|
|
294
|
+
"content_hash": artifact_meta.content_hash,
|
|
295
|
+
"content_type": artifact_meta.content_type,
|
|
296
|
+
},
|
|
297
|
+
))
|
|
298
|
+
|
|
299
|
+
logger.info(f"Successfully ingested {file_path} as artifact {artifact_meta.artifact_id}")
|
|
300
|
+
|
|
301
|
+
except Exception as e:
|
|
302
|
+
logger.exception(f"Error processing file {file_path}")
|
|
303
|
+
self._emit_event(IngestionEvent(
|
|
304
|
+
event_type=IngestionEventType.CONVERSION_FAILED,
|
|
305
|
+
file_path=file_path,
|
|
306
|
+
task_id=task_id,
|
|
307
|
+
error_message=str(e),
|
|
308
|
+
))
|
|
309
|
+
|
|
310
|
+
def _register_artifact(
|
|
311
|
+
self,
|
|
312
|
+
file_path: Path,
|
|
313
|
+
source_file: Path,
|
|
314
|
+
conversion_metadata: dict[str, Any],
|
|
315
|
+
) -> Any:
|
|
316
|
+
"""Register converted file as an artifact."""
|
|
317
|
+
metadata = {
|
|
318
|
+
"source_file": str(source_file),
|
|
319
|
+
"original_filename": source_file.name,
|
|
320
|
+
**conversion_metadata,
|
|
321
|
+
}
|
|
322
|
+
|
|
323
|
+
return self.artifact_manager.store_file(
|
|
324
|
+
file_path=file_path,
|
|
325
|
+
source_type=ArtifactSourceType.IMPORTED,
|
|
326
|
+
content_type="text/plain",
|
|
327
|
+
tags=["mailroom", "converted", source_file.suffix.lower().lstrip(".")],
|
|
328
|
+
metadata=metadata,
|
|
329
|
+
)
|
|
330
|
+
|
|
331
|
+
def is_running(self) -> bool:
|
|
332
|
+
"""Check if the watcher is currently running."""
|
|
333
|
+
return self._running
|
|
334
|
+
|
|
335
|
+
def get_stats(self) -> dict[str, Any]:
|
|
336
|
+
"""Get watcher statistics."""
|
|
337
|
+
return {
|
|
338
|
+
"running": self._running,
|
|
339
|
+
"dropzone_path": str(self.dropzone_path),
|
|
340
|
+
"output_dir": str(self.output_dir),
|
|
341
|
+
"pending_tasks": len(self._pending_tasks),
|
|
342
|
+
"supported_extensions": list(self.conversion_worker.get_supported_extensions()),
|
|
343
|
+
}
|