monoco-toolkit 0.3.10__py3-none-any.whl → 0.3.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- monoco/__main__.py +8 -0
- monoco/core/artifacts/__init__.py +16 -0
- monoco/core/artifacts/manager.py +575 -0
- monoco/core/artifacts/models.py +161 -0
- monoco/core/config.py +31 -4
- monoco/core/git.py +23 -0
- monoco/core/ingestion/__init__.py +20 -0
- monoco/core/ingestion/discovery.py +248 -0
- monoco/core/ingestion/watcher.py +343 -0
- monoco/core/ingestion/worker.py +436 -0
- monoco/core/loader.py +633 -0
- monoco/core/registry.py +34 -25
- monoco/core/skills.py +119 -80
- monoco/daemon/app.py +77 -1
- monoco/daemon/commands.py +10 -0
- monoco/daemon/mailroom_service.py +196 -0
- monoco/daemon/models.py +1 -0
- monoco/daemon/scheduler.py +236 -0
- monoco/daemon/services.py +185 -0
- monoco/daemon/triggers.py +55 -0
- monoco/features/agent/adapter.py +17 -7
- monoco/features/agent/apoptosis.py +4 -4
- monoco/features/agent/manager.py +41 -5
- monoco/{core/resources/en/skills/monoco_core → features/agent/resources/en/skills/monoco_atom_core}/SKILL.md +2 -2
- monoco/features/agent/resources/en/skills/{flow_engineer → monoco_workflow_agent_engineer}/SKILL.md +2 -2
- monoco/features/agent/resources/en/skills/{flow_manager → monoco_workflow_agent_manager}/SKILL.md +2 -2
- monoco/features/agent/resources/en/skills/{flow_planner → monoco_workflow_agent_planner}/SKILL.md +2 -2
- monoco/features/agent/resources/en/skills/{flow_reviewer → monoco_workflow_agent_reviewer}/SKILL.md +2 -2
- monoco/features/agent/resources/{roles/role-engineer.yaml → zh/roles/monoco_role_engineer.yaml} +3 -3
- monoco/features/agent/resources/{roles/role-manager.yaml → zh/roles/monoco_role_manager.yaml} +8 -8
- monoco/features/agent/resources/{roles/role-planner.yaml → zh/roles/monoco_role_planner.yaml} +8 -8
- monoco/features/agent/resources/{roles/role-reviewer.yaml → zh/roles/monoco_role_reviewer.yaml} +8 -8
- monoco/{core/resources/zh/skills/monoco_core → features/agent/resources/zh/skills/monoco_atom_core}/SKILL.md +2 -2
- monoco/features/agent/resources/zh/skills/{flow_engineer → monoco_workflow_agent_engineer}/SKILL.md +2 -2
- monoco/features/agent/resources/zh/skills/{flow_manager → monoco_workflow_agent_manager}/SKILL.md +2 -2
- monoco/features/agent/resources/zh/skills/{flow_planner → monoco_workflow_agent_planner}/SKILL.md +2 -2
- monoco/features/agent/resources/zh/skills/{flow_reviewer → monoco_workflow_agent_reviewer}/SKILL.md +2 -2
- monoco/features/agent/session.py +59 -11
- monoco/features/artifact/__init__.py +0 -0
- monoco/features/artifact/adapter.py +33 -0
- monoco/features/artifact/resources/zh/AGENTS.md +14 -0
- monoco/features/artifact/resources/zh/skills/monoco_atom_artifact/SKILL.md +278 -0
- monoco/features/glossary/adapter.py +18 -7
- monoco/features/glossary/resources/en/skills/{monoco_glossary → monoco_atom_glossary}/SKILL.md +2 -2
- monoco/features/glossary/resources/zh/skills/{monoco_glossary → monoco_atom_glossary}/SKILL.md +2 -2
- monoco/features/hooks/__init__.py +11 -0
- monoco/features/hooks/adapter.py +67 -0
- monoco/features/hooks/commands.py +309 -0
- monoco/features/hooks/core.py +441 -0
- monoco/features/hooks/resources/ADDING_HOOKS.md +234 -0
- monoco/features/i18n/adapter.py +18 -5
- monoco/features/i18n/core.py +482 -17
- monoco/features/i18n/resources/en/skills/{monoco_i18n → monoco_atom_i18n}/SKILL.md +2 -2
- monoco/features/i18n/resources/en/skills/{i18n_scan_workflow → monoco_workflow_i18n_scan}/SKILL.md +2 -2
- monoco/features/i18n/resources/zh/skills/{monoco_i18n → monoco_atom_i18n}/SKILL.md +2 -2
- monoco/features/i18n/resources/zh/skills/{i18n_scan_workflow → monoco_workflow_i18n_scan}/SKILL.md +2 -2
- monoco/features/issue/adapter.py +19 -6
- monoco/features/issue/commands.py +281 -7
- monoco/features/issue/core.py +227 -13
- monoco/features/issue/engine/machine.py +114 -4
- monoco/features/issue/linter.py +60 -5
- monoco/features/issue/models.py +2 -2
- monoco/features/issue/resources/en/AGENTS.md +109 -0
- monoco/features/issue/resources/en/skills/{monoco_issue → monoco_atom_issue}/SKILL.md +2 -2
- monoco/features/issue/resources/en/skills/{issue_create_workflow → monoco_workflow_issue_creation}/SKILL.md +2 -2
- monoco/features/issue/resources/en/skills/{issue_develop_workflow → monoco_workflow_issue_development}/SKILL.md +2 -2
- monoco/features/issue/resources/en/skills/{issue_lifecycle_workflow → monoco_workflow_issue_management}/SKILL.md +2 -2
- monoco/features/issue/resources/en/skills/{issue_refine_workflow → monoco_workflow_issue_refinement}/SKILL.md +2 -2
- monoco/features/issue/resources/hooks/post-checkout.sh +39 -0
- monoco/features/issue/resources/hooks/pre-commit.sh +41 -0
- monoco/features/issue/resources/hooks/pre-push.sh +35 -0
- monoco/features/issue/resources/zh/AGENTS.md +109 -0
- monoco/features/issue/resources/zh/skills/{monoco_issue → monoco_atom_issue_lifecycle}/SKILL.md +2 -2
- monoco/features/issue/resources/zh/skills/{issue_create_workflow → monoco_workflow_issue_creation}/SKILL.md +2 -2
- monoco/features/issue/resources/zh/skills/{issue_develop_workflow → monoco_workflow_issue_development}/SKILL.md +2 -2
- monoco/features/issue/resources/zh/skills/{issue_lifecycle_workflow → monoco_workflow_issue_management}/SKILL.md +2 -2
- monoco/features/issue/resources/zh/skills/{issue_refine_workflow → monoco_workflow_issue_refinement}/SKILL.md +2 -2
- monoco/features/issue/validator.py +101 -1
- monoco/features/memo/adapter.py +21 -8
- monoco/features/memo/cli.py +103 -10
- monoco/features/memo/core.py +178 -92
- monoco/features/memo/models.py +53 -0
- monoco/features/memo/resources/en/skills/{monoco_memo → monoco_atom_memo}/SKILL.md +2 -2
- monoco/features/memo/resources/en/skills/{note_processing_workflow → monoco_workflow_note_processing}/SKILL.md +2 -2
- monoco/features/memo/resources/zh/skills/{monoco_memo → monoco_atom_memo}/SKILL.md +2 -2
- monoco/features/memo/resources/zh/skills/{note_processing_workflow → monoco_workflow_note_processing}/SKILL.md +2 -2
- monoco/features/spike/adapter.py +18 -5
- monoco/features/spike/resources/en/skills/{monoco_spike → monoco_atom_spike}/SKILL.md +2 -2
- monoco/features/spike/resources/en/skills/{research_workflow → monoco_workflow_research}/SKILL.md +2 -2
- monoco/features/spike/resources/zh/skills/{monoco_spike → monoco_atom_spike}/SKILL.md +2 -2
- monoco/features/spike/resources/zh/skills/{research_workflow → monoco_workflow_research}/SKILL.md +2 -2
- monoco/main.py +38 -1
- {monoco_toolkit-0.3.10.dist-info → monoco_toolkit-0.3.11.dist-info}/METADATA +7 -1
- monoco_toolkit-0.3.11.dist-info/RECORD +181 -0
- monoco_toolkit-0.3.10.dist-info/RECORD +0 -156
- /monoco/{core → features/agent}/resources/en/AGENTS.md +0 -0
- /monoco/{core → features/agent}/resources/zh/AGENTS.md +0 -0
- {monoco_toolkit-0.3.10.dist-info → monoco_toolkit-0.3.11.dist-info}/WHEEL +0 -0
- {monoco_toolkit-0.3.10.dist-info → monoco_toolkit-0.3.11.dist-info}/entry_points.txt +0 -0
- {monoco_toolkit-0.3.10.dist-info → monoco_toolkit-0.3.11.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,436 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Conversion Worker for Monoco Mailroom.
|
|
3
|
+
|
|
4
|
+
Handles document conversion tasks using discovered tools.
|
|
5
|
+
Supports concurrent processing with asyncio.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import asyncio
|
|
11
|
+
import logging
|
|
12
|
+
import shutil
|
|
13
|
+
import subprocess
|
|
14
|
+
import tempfile
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from datetime import datetime, timezone
|
|
17
|
+
from enum import Enum
|
|
18
|
+
from pathlib import Path
|
|
19
|
+
from typing import Optional, Callable, Any
|
|
20
|
+
|
|
21
|
+
from .discovery import EnvironmentDiscovery, ToolCapability, ConversionTool
|
|
22
|
+
|
|
23
|
+
logger = logging.getLogger(__name__)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
class ConversionStatus(str, Enum):
|
|
27
|
+
"""Status of a conversion task."""
|
|
28
|
+
PENDING = "pending"
|
|
29
|
+
PROCESSING = "processing"
|
|
30
|
+
SUCCESS = "success"
|
|
31
|
+
FAILED = "failed"
|
|
32
|
+
CANCELLED = "cancelled"
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass
|
|
36
|
+
class ConversionTask:
|
|
37
|
+
"""Represents a document conversion task."""
|
|
38
|
+
task_id: str
|
|
39
|
+
source_path: Path
|
|
40
|
+
target_format: str
|
|
41
|
+
output_dir: Path
|
|
42
|
+
options: dict[str, Any] = field(default_factory=dict)
|
|
43
|
+
created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
|
|
44
|
+
|
|
45
|
+
@property
|
|
46
|
+
def source_extension(self) -> str:
|
|
47
|
+
"""Get the source file extension."""
|
|
48
|
+
return self.source_path.suffix.lower()
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@dataclass
|
|
52
|
+
class ConversionResult:
|
|
53
|
+
"""Result of a conversion operation."""
|
|
54
|
+
task_id: str
|
|
55
|
+
status: ConversionStatus
|
|
56
|
+
output_path: Optional[Path] = None
|
|
57
|
+
error_message: Optional[str] = None
|
|
58
|
+
processing_time_ms: float = 0.0
|
|
59
|
+
metadata: dict[str, Any] = field(default_factory=dict)
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
class ConversionWorker:
|
|
63
|
+
"""
|
|
64
|
+
Worker for processing document conversion tasks.
|
|
65
|
+
|
|
66
|
+
Features:
|
|
67
|
+
- Async processing with semaphore-controlled concurrency
|
|
68
|
+
- Tool selection based on file type and capability
|
|
69
|
+
- Automatic cleanup of temporary files
|
|
70
|
+
- Progress callbacks
|
|
71
|
+
"""
|
|
72
|
+
|
|
73
|
+
# File extension to required capability mapping
|
|
74
|
+
EXTENSION_CAPABILITIES = {
|
|
75
|
+
".docx": ToolCapability.DOCX_TO_MD,
|
|
76
|
+
".doc": ToolCapability.DOCX_TO_TEXT,
|
|
77
|
+
".odt": ToolCapability.ODT_TO_TEXT,
|
|
78
|
+
".pdf": ToolCapability.PDF_TO_TEXT,
|
|
79
|
+
".xlsx": ToolCapability.XLSX_TO_CSV,
|
|
80
|
+
".xls": ToolCapability.XLSX_TO_CSV,
|
|
81
|
+
".pptx": ToolCapability.PPTX_TO_TEXT,
|
|
82
|
+
".ppt": ToolCapability.PPTX_TO_TEXT,
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
def __init__(
|
|
86
|
+
self,
|
|
87
|
+
discovery: Optional[EnvironmentDiscovery] = None,
|
|
88
|
+
max_concurrent: int = 4,
|
|
89
|
+
timeout_seconds: float = 120.0,
|
|
90
|
+
):
|
|
91
|
+
"""
|
|
92
|
+
Initialize the conversion worker.
|
|
93
|
+
|
|
94
|
+
Args:
|
|
95
|
+
discovery: EnvironmentDiscovery instance (creates new if None)
|
|
96
|
+
max_concurrent: Maximum concurrent conversion tasks
|
|
97
|
+
timeout_seconds: Timeout for individual conversions
|
|
98
|
+
"""
|
|
99
|
+
self.discovery = discovery or EnvironmentDiscovery()
|
|
100
|
+
self.discovery.discover()
|
|
101
|
+
|
|
102
|
+
self.max_concurrent = max_concurrent
|
|
103
|
+
self.timeout_seconds = timeout_seconds
|
|
104
|
+
self._semaphore = asyncio.Semaphore(max_concurrent)
|
|
105
|
+
self._active_tasks: dict[str, asyncio.Task] = {}
|
|
106
|
+
|
|
107
|
+
# Callbacks
|
|
108
|
+
self._on_progress: Optional[Callable[[str, ConversionStatus, float], None]] = None
|
|
109
|
+
self._on_complete: Optional[Callable[[ConversionResult], None]] = None
|
|
110
|
+
|
|
111
|
+
def set_callbacks(
|
|
112
|
+
self,
|
|
113
|
+
on_progress: Optional[Callable[[str, ConversionStatus, float], None]] = None,
|
|
114
|
+
on_complete: Optional[Callable[[ConversionResult], None]] = None,
|
|
115
|
+
) -> None:
|
|
116
|
+
"""Set progress and completion callbacks."""
|
|
117
|
+
self._on_progress = on_progress
|
|
118
|
+
self._on_complete = on_complete
|
|
119
|
+
|
|
120
|
+
def _notify_progress(self, task_id: str, status: ConversionStatus, progress: float) -> None:
|
|
121
|
+
"""Notify progress callback."""
|
|
122
|
+
if self._on_progress:
|
|
123
|
+
try:
|
|
124
|
+
self._on_progress(task_id, status, progress)
|
|
125
|
+
except Exception:
|
|
126
|
+
pass
|
|
127
|
+
|
|
128
|
+
def _notify_complete(self, result: ConversionResult) -> None:
|
|
129
|
+
"""Notify completion callback."""
|
|
130
|
+
if self._on_complete:
|
|
131
|
+
try:
|
|
132
|
+
self._on_complete(result)
|
|
133
|
+
except Exception:
|
|
134
|
+
pass
|
|
135
|
+
|
|
136
|
+
def can_convert(self, file_path: Path) -> bool:
|
|
137
|
+
"""
|
|
138
|
+
Check if a file can be converted.
|
|
139
|
+
|
|
140
|
+
Args:
|
|
141
|
+
file_path: Path to the file to check
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
True if conversion is possible
|
|
145
|
+
"""
|
|
146
|
+
ext = file_path.suffix.lower()
|
|
147
|
+
if ext not in self.EXTENSION_CAPABILITIES:
|
|
148
|
+
return False
|
|
149
|
+
|
|
150
|
+
capability = self.EXTENSION_CAPABILITIES[ext]
|
|
151
|
+
return self.discovery.has_capability(capability)
|
|
152
|
+
|
|
153
|
+
def get_supported_extensions(self) -> list[str]:
|
|
154
|
+
"""Get list of supported file extensions."""
|
|
155
|
+
supported = []
|
|
156
|
+
for ext, capability in self.EXTENSION_CAPABILITIES.items():
|
|
157
|
+
if self.discovery.has_capability(capability):
|
|
158
|
+
supported.append(ext)
|
|
159
|
+
return supported
|
|
160
|
+
|
|
161
|
+
async def submit(self, task: ConversionTask) -> ConversionResult:
|
|
162
|
+
"""
|
|
163
|
+
Submit a conversion task for processing.
|
|
164
|
+
|
|
165
|
+
Args:
|
|
166
|
+
task: The conversion task to process
|
|
167
|
+
|
|
168
|
+
Returns:
|
|
169
|
+
ConversionResult with status and output details
|
|
170
|
+
"""
|
|
171
|
+
async with self._semaphore:
|
|
172
|
+
return await self._process_task(task)
|
|
173
|
+
|
|
174
|
+
async def submit_batch(
|
|
175
|
+
self,
|
|
176
|
+
tasks: list[ConversionTask],
|
|
177
|
+
) -> list[ConversionResult]:
|
|
178
|
+
"""
|
|
179
|
+
Submit multiple tasks for concurrent processing.
|
|
180
|
+
|
|
181
|
+
Args:
|
|
182
|
+
tasks: List of conversion tasks
|
|
183
|
+
|
|
184
|
+
Returns:
|
|
185
|
+
List of ConversionResults (order may vary)
|
|
186
|
+
"""
|
|
187
|
+
coroutines = [self.submit(task) for task in tasks]
|
|
188
|
+
return await asyncio.gather(*coroutines, return_exceptions=True)
|
|
189
|
+
|
|
190
|
+
async def _process_task(self, task: ConversionTask) -> ConversionResult:
|
|
191
|
+
"""Process a single conversion task."""
|
|
192
|
+
import time
|
|
193
|
+
start_time = time.time()
|
|
194
|
+
|
|
195
|
+
self._notify_progress(task.task_id, ConversionStatus.PROCESSING, 0.0)
|
|
196
|
+
|
|
197
|
+
try:
|
|
198
|
+
# Validate source file
|
|
199
|
+
if not task.source_path.exists():
|
|
200
|
+
return self._create_error_result(
|
|
201
|
+
task, "Source file does not exist"
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
# Get required capability
|
|
205
|
+
ext = task.source_extension
|
|
206
|
+
if ext not in self.EXTENSION_CAPABILITIES:
|
|
207
|
+
return self._create_error_result(
|
|
208
|
+
task, f"Unsupported file extension: {ext}"
|
|
209
|
+
)
|
|
210
|
+
|
|
211
|
+
capability = self.EXTENSION_CAPABILITIES[ext]
|
|
212
|
+
tool = self.discovery.get_best_tool(capability)
|
|
213
|
+
|
|
214
|
+
if not tool:
|
|
215
|
+
return self._create_error_result(
|
|
216
|
+
task, f"No tool available for {capability.value}"
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
# Perform conversion
|
|
220
|
+
result = await self._convert_with_tool(task, tool, capability)
|
|
221
|
+
|
|
222
|
+
processing_time = (time.time() - start_time) * 1000
|
|
223
|
+
result.processing_time_ms = processing_time
|
|
224
|
+
|
|
225
|
+
self._notify_complete(result)
|
|
226
|
+
return result
|
|
227
|
+
|
|
228
|
+
except asyncio.TimeoutError:
|
|
229
|
+
result = self._create_error_result(task, "Conversion timeout")
|
|
230
|
+
self._notify_complete(result)
|
|
231
|
+
return result
|
|
232
|
+
except Exception as e:
|
|
233
|
+
logger.exception(f"Conversion failed for task {task.task_id}")
|
|
234
|
+
result = self._create_error_result(task, str(e))
|
|
235
|
+
self._notify_complete(result)
|
|
236
|
+
return result
|
|
237
|
+
|
|
238
|
+
def _create_error_result(self, task: ConversionTask, message: str) -> ConversionResult:
|
|
239
|
+
"""Create a failed conversion result."""
|
|
240
|
+
return ConversionResult(
|
|
241
|
+
task_id=task.task_id,
|
|
242
|
+
status=ConversionStatus.FAILED,
|
|
243
|
+
error_message=message,
|
|
244
|
+
)
|
|
245
|
+
|
|
246
|
+
async def _convert_with_tool(
|
|
247
|
+
self,
|
|
248
|
+
task: ConversionTask,
|
|
249
|
+
tool: ConversionTool,
|
|
250
|
+
capability: ToolCapability,
|
|
251
|
+
) -> ConversionResult:
|
|
252
|
+
"""Convert using the specified tool."""
|
|
253
|
+
|
|
254
|
+
if tool.tool_type.value == "libreoffice":
|
|
255
|
+
return await self._convert_with_libreoffice(task, tool)
|
|
256
|
+
elif tool.tool_type.value == "pandoc":
|
|
257
|
+
return await self._convert_with_pandoc(task, tool)
|
|
258
|
+
elif tool.tool_type.value in ("pdf2text", "pdftohtml"):
|
|
259
|
+
return await self._convert_with_pdf_tool(task, tool)
|
|
260
|
+
else:
|
|
261
|
+
return self._create_error_result(task, f"Unknown tool type: {tool.tool_type}")
|
|
262
|
+
|
|
263
|
+
async def _convert_with_libreoffice(
|
|
264
|
+
self,
|
|
265
|
+
task: ConversionTask,
|
|
266
|
+
tool: ConversionTool,
|
|
267
|
+
) -> ConversionResult:
|
|
268
|
+
"""Convert using LibreOffice."""
|
|
269
|
+
# Create temp directory for conversion
|
|
270
|
+
with tempfile.TemporaryDirectory() as tmpdir:
|
|
271
|
+
tmp_path = Path(tmpdir)
|
|
272
|
+
|
|
273
|
+
# Build LibreOffice command
|
|
274
|
+
cmd = [
|
|
275
|
+
str(tool.executable_path),
|
|
276
|
+
"--headless",
|
|
277
|
+
"--convert-to", "txt:Text",
|
|
278
|
+
"--outdir", str(tmp_path),
|
|
279
|
+
str(task.source_path),
|
|
280
|
+
]
|
|
281
|
+
|
|
282
|
+
# Run conversion
|
|
283
|
+
try:
|
|
284
|
+
process = await asyncio.create_subprocess_exec(
|
|
285
|
+
*cmd,
|
|
286
|
+
stdout=asyncio.subprocess.PIPE,
|
|
287
|
+
stderr=asyncio.subprocess.PIPE,
|
|
288
|
+
)
|
|
289
|
+
stdout, stderr = await asyncio.wait_for(
|
|
290
|
+
process.communicate(),
|
|
291
|
+
timeout=self.timeout_seconds,
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
if process.returncode != 0:
|
|
295
|
+
error_msg = stderr.decode() if stderr else "Unknown error"
|
|
296
|
+
return self._create_error_result(task, f"LibreOffice error: {error_msg}")
|
|
297
|
+
|
|
298
|
+
# Find output file
|
|
299
|
+
base_name = task.source_path.stem
|
|
300
|
+
output_file = tmp_path / f"{base_name}.txt"
|
|
301
|
+
|
|
302
|
+
if not output_file.exists():
|
|
303
|
+
return self._create_error_result(task, "Output file not created")
|
|
304
|
+
|
|
305
|
+
# Move to final destination
|
|
306
|
+
task.output_dir.mkdir(parents=True, exist_ok=True)
|
|
307
|
+
final_output = task.output_dir / f"{base_name}.txt"
|
|
308
|
+
shutil.move(str(output_file), str(final_output))
|
|
309
|
+
|
|
310
|
+
return ConversionResult(
|
|
311
|
+
task_id=task.task_id,
|
|
312
|
+
status=ConversionStatus.SUCCESS,
|
|
313
|
+
output_path=final_output,
|
|
314
|
+
metadata={
|
|
315
|
+
"tool": tool.name,
|
|
316
|
+
"tool_version": tool.version,
|
|
317
|
+
},
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
except asyncio.TimeoutError:
|
|
321
|
+
return self._create_error_result(task, "LibreOffice conversion timeout")
|
|
322
|
+
|
|
323
|
+
async def _convert_with_pandoc(
|
|
324
|
+
self,
|
|
325
|
+
task: ConversionTask,
|
|
326
|
+
tool: ConversionTool,
|
|
327
|
+
) -> ConversionResult:
|
|
328
|
+
"""Convert using Pandoc."""
|
|
329
|
+
task.output_dir.mkdir(parents=True, exist_ok=True)
|
|
330
|
+
|
|
331
|
+
base_name = task.source_path.stem
|
|
332
|
+
output_file = task.output_dir / f"{base_name}.md"
|
|
333
|
+
|
|
334
|
+
cmd = [
|
|
335
|
+
str(tool.executable_path),
|
|
336
|
+
str(task.source_path),
|
|
337
|
+
"-o", str(output_file),
|
|
338
|
+
"-t", "markdown",
|
|
339
|
+
]
|
|
340
|
+
|
|
341
|
+
try:
|
|
342
|
+
process = await asyncio.create_subprocess_exec(
|
|
343
|
+
*cmd,
|
|
344
|
+
stdout=asyncio.subprocess.PIPE,
|
|
345
|
+
stderr=asyncio.subprocess.PIPE,
|
|
346
|
+
)
|
|
347
|
+
stdout, stderr = await asyncio.wait_for(
|
|
348
|
+
process.communicate(),
|
|
349
|
+
timeout=self.timeout_seconds,
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
if process.returncode != 0:
|
|
353
|
+
error_msg = stderr.decode() if stderr else "Unknown error"
|
|
354
|
+
return self._create_error_result(task, f"Pandoc error: {error_msg}")
|
|
355
|
+
|
|
356
|
+
return ConversionResult(
|
|
357
|
+
task_id=task.task_id,
|
|
358
|
+
status=ConversionStatus.SUCCESS,
|
|
359
|
+
output_path=output_file,
|
|
360
|
+
metadata={
|
|
361
|
+
"tool": tool.name,
|
|
362
|
+
"tool_version": tool.version,
|
|
363
|
+
},
|
|
364
|
+
)
|
|
365
|
+
|
|
366
|
+
except asyncio.TimeoutError:
|
|
367
|
+
return self._create_error_result(task, "Pandoc conversion timeout")
|
|
368
|
+
|
|
369
|
+
async def _convert_with_pdf_tool(
|
|
370
|
+
self,
|
|
371
|
+
task: ConversionTask,
|
|
372
|
+
tool: ConversionTool,
|
|
373
|
+
) -> ConversionResult:
|
|
374
|
+
"""Convert PDF using pdftotext or similar."""
|
|
375
|
+
task.output_dir.mkdir(parents=True, exist_ok=True)
|
|
376
|
+
|
|
377
|
+
base_name = task.source_path.stem
|
|
378
|
+
output_file = task.output_dir / f"{base_name}.txt"
|
|
379
|
+
|
|
380
|
+
cmd = [
|
|
381
|
+
str(tool.executable_path),
|
|
382
|
+
str(task.source_path),
|
|
383
|
+
str(output_file),
|
|
384
|
+
]
|
|
385
|
+
|
|
386
|
+
# Add layout preservation for pdftotext
|
|
387
|
+
if "pdftotext" in tool.name:
|
|
388
|
+
cmd.insert(1, "-layout")
|
|
389
|
+
|
|
390
|
+
try:
|
|
391
|
+
process = await asyncio.create_subprocess_exec(
|
|
392
|
+
*cmd,
|
|
393
|
+
stdout=asyncio.subprocess.PIPE,
|
|
394
|
+
stderr=asyncio.subprocess.PIPE,
|
|
395
|
+
)
|
|
396
|
+
stdout, stderr = await asyncio.wait_for(
|
|
397
|
+
process.communicate(),
|
|
398
|
+
timeout=self.timeout_seconds,
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
if process.returncode != 0:
|
|
402
|
+
error_msg = stderr.decode() if stderr else "Unknown error"
|
|
403
|
+
return self._create_error_result(task, f"PDF tool error: {error_msg}")
|
|
404
|
+
|
|
405
|
+
return ConversionResult(
|
|
406
|
+
task_id=task.task_id,
|
|
407
|
+
status=ConversionStatus.SUCCESS,
|
|
408
|
+
output_path=output_file,
|
|
409
|
+
metadata={
|
|
410
|
+
"tool": tool.name,
|
|
411
|
+
"tool_version": tool.version,
|
|
412
|
+
},
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
except asyncio.TimeoutError:
|
|
416
|
+
return self._create_error_result(task, "PDF conversion timeout")
|
|
417
|
+
|
|
418
|
+
async def cancel_task(self, task_id: str) -> bool:
|
|
419
|
+
"""
|
|
420
|
+
Cancel an active task.
|
|
421
|
+
|
|
422
|
+
Args:
|
|
423
|
+
task_id: ID of the task to cancel
|
|
424
|
+
|
|
425
|
+
Returns:
|
|
426
|
+
True if cancelled, False if not found
|
|
427
|
+
"""
|
|
428
|
+
if task_id in self._active_tasks:
|
|
429
|
+
task = self._active_tasks[task_id]
|
|
430
|
+
task.cancel()
|
|
431
|
+
return True
|
|
432
|
+
return False
|
|
433
|
+
|
|
434
|
+
def get_active_count(self) -> int:
|
|
435
|
+
"""Get number of currently active tasks."""
|
|
436
|
+
return len(self._active_tasks)
|