aimd-cli 0.9.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- aimd/AGENTS.md +68 -0
- aimd/__init__.py +13 -0
- aimd/adapters/AGENTS.md +23 -0
- aimd/adapters/__init__.py +1 -0
- aimd/adapters/cli/__init__.py +1 -0
- aimd/adapters/cli/app.py +216 -0
- aimd/application/AGENTS.md +31 -0
- aimd/application/__init__.py +14 -0
- aimd/application/bootstrap.py +51 -0
- aimd/application/models.py +43 -0
- aimd/application/services/__init__.py +1 -0
- aimd/application/services/interface_payloads.py +81 -0
- aimd/application/services/output_writer.py +68 -0
- aimd/application/use_cases/__init__.py +1 -0
- aimd/application/use_cases/input_routing.py +51 -0
- aimd/application/use_cases/list_engines.py +34 -0
- aimd/application/use_cases/process_input.py +40 -0
- aimd/application/use_cases/processors/__init__.py +13 -0
- aimd/application/use_cases/processors/_base.py +17 -0
- aimd/application/use_cases/processors/convert.py +35 -0
- aimd/application/use_cases/processors/transcript.py +92 -0
- aimd/cli.py +9 -0
- aimd/const.py +31 -0
- aimd/errors.py +41 -0
- aimd/infrastructure/AGENTS.md +26 -0
- aimd/infrastructure/__init__.py +1 -0
- aimd/infrastructure/documents/__init__.py +19 -0
- aimd/infrastructure/documents/chunking.py +168 -0
- aimd/infrastructure/documents/title_extractor.py +90 -0
- aimd/infrastructure/markitdown_processor.py +103 -0
- aimd/infrastructure/media_processor.py +51 -0
- aimd/platform_utils.py +26 -0
- aimd/py.typed +0 -0
- aimd/types.py +12 -0
- aimd/utils.py +70 -0
- aimd_cli-0.9.2.dist-info/METADATA +23 -0
- aimd_cli-0.9.2.dist-info/RECORD +39 -0
- aimd_cli-0.9.2.dist-info/WHEEL +4 -0
- aimd_cli-0.9.2.dist-info/entry_points.txt +3 -0
aimd/AGENTS.md
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# packages/aimd/src/aimd
|
|
2
|
+
|
|
3
|
+
Core package organized with ports/adapters architecture.
|
|
4
|
+
|
|
5
|
+
## STRUCTURE
|
|
6
|
+
|
|
7
|
+
- `application/` — use-cases, canonical request/response models, bootstrap wiring
|
|
8
|
+
- `infrastructure/` — MarkItDown runner, media package adapter, and Markdown chunking helpers
|
|
9
|
+
- `adapters/` — CLI interface adapter
|
|
10
|
+
- `cli.py` — runtime entrypoint
|
|
11
|
+
|
|
12
|
+
Feature packages live beside this package:
|
|
13
|
+
|
|
14
|
+
- `packages/aimd-media/` — yt-dlp URLs, subtitles, audio fallback, ASR plugin, and capability checks
|
|
15
|
+
- `packages/aimd-api/` — FastAPI service package
|
|
16
|
+
- `packages/aimd-mcp/` — MCP stdio server package
|
|
17
|
+
- `packages/aimd-book/` — MarkItDown plugin for ebook extraction and cleanup
|
|
18
|
+
- `packages/aimd-ocr/` — OCR plugin scaffold
|
|
19
|
+
- `packages/aimd-html/` — Defuddle CLI wrapper
|
|
20
|
+
|
|
21
|
+
## CONVENTIONS
|
|
22
|
+
|
|
23
|
+
- Keep orchestration in `application/use_cases/*`.
|
|
24
|
+
- Keep IO/third-party integrations in `infrastructure/*`.
|
|
25
|
+
- Keep interface-specific request/response mapping in `adapters/*`.
|
|
26
|
+
|
|
27
|
+
- Keep platform-dependent dependency use behind capability checks; `mlx-audio` is Darwin-only and Qwen3-ASR runs through Transformers on Linux/CUDA.
|
|
28
|
+
- Keep heavy local-file integrations behind MarkItDown plugins; keep this package as the facade/router and `TextContext` wrapper.
|
|
29
|
+
- Keep output file persistence in adapters via `application/services/output_writer.py`; `ProcessInput` and `ProcessResult` do not carry `output_file`.
|
|
30
|
+
- Keep API/MCP payload mapping in `application/services/interface_payloads.py` as plain helpers with no FastAPI/MCP imports.
|
|
31
|
+
|
|
32
|
+
## BOOK CONVERSION
|
|
33
|
+
|
|
34
|
+
`aimd-book` owns ebook conversion through the MarkItDown plugin entry point. Core routing treats `.epub`, `.mobi`, and `.azw3` as book inputs via `BOOK_EXTENSIONS`, but the current book pipeline is EPUB-compatible ZIP/spine extraction. Add true non-EPUB handling inside `aimd-book` rather than special-casing it in `aimd.infrastructure` or adapters.
|
|
35
|
+
|
|
36
|
+
## TEMP DIRECTORY
|
|
37
|
+
|
|
38
|
+
All temporary file operations (audio downloads, ebook extraction) use
|
|
39
|
+
Python's `tempfile` module with a configurable base directory:
|
|
40
|
+
|
|
41
|
+
- **CLI**: `--temp-dir` option (also reads `AIMD_TEMP_DIR` env var)
|
|
42
|
+
- **MCP / HTTP**: reads `AIMD_TEMP_DIR` env var at request time
|
|
43
|
+
- **Default**: when unset, falls back to the system temp directory (`/tmp`)
|
|
44
|
+
|
|
45
|
+
In sandboxed environments where `/tmp` may not be writable, set `AIMD_TEMP_DIR`
|
|
46
|
+
to redirect temp I/O. The `temp_dir` field flows through `ProcessInput` →
|
|
47
|
+
use-cases → infrastructure functions via the `dir=` parameter of
|
|
48
|
+
`tempfile.TemporaryDirectory` and `tempfile.NamedTemporaryFile`. ASR temp files are implemented in `aimd-media`; ebook extraction temp files are implemented in `aimd-book`.
|
|
49
|
+
|
|
50
|
+
## SUBTITLE FORMATTING
|
|
51
|
+
|
|
52
|
+
URL-sourced subtitles (SRT/VTT/TTML) are simplified to plain text by default.
|
|
53
|
+
The `raw_transcript` field on `ProcessInput` (default `False`) controls this:
|
|
54
|
+
|
|
55
|
+
- **CLI**: `--raw-transcript` flag
|
|
56
|
+
- **HTTP API**: `raw_transcript` field in `ProcessRequest`
|
|
57
|
+
- **MCP**: `raw_transcript` parameter on `process_input` tool
|
|
58
|
+
|
|
59
|
+
The stripping is performed by `strip_subtitle_formatting()` in
|
|
60
|
+
`aimd_media.url.formatter`, applied in `aimd_media.url.processor` before
|
|
61
|
+
`format_content()` embeds the text into the markdown output.
|
|
62
|
+
|
|
63
|
+
## TRANSCRIPTION MODELS
|
|
64
|
+
|
|
65
|
+
- Engine names are fixed in `const.TRANSCRIPTION_ENGINES`: `auto`, `mlx`, `qwen`.
|
|
66
|
+
- `mlx` is implemented in `aimd_media.mlx_engine` and uses `mlx_audio.stt.load()` on Apple Silicon. The default remains `mlx-community/Qwen3-ASR-1.7B-4bit`; `const.MLX_AUDIO_MODELS` also tracks newer mlx-audio 0.4.4 STT IDs. Do not add forced-aligner models to this list unless the calling code also supplies reference text.
|
|
67
|
+
- `qwen` is implemented in `aimd_media.qwen_engine` and uses a direct Transformers backend on Linux/CUDA with `Qwen/Qwen3-ASR-1.7B` default and `Qwen/Qwen3-ASR-0.6B` as the lower-memory option.
|
|
68
|
+
- mlx Qwen3-ASR defaults omitted language to `Chinese`; other mlx-audio STT models stay on their own default/auto language behavior.
|
aimd/__init__.py
ADDED
aimd/adapters/AGENTS.md
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
# packages/aimd/src/aimd/adapters
|
|
2
|
+
|
|
3
|
+
Interface adapters for the core CLI package.
|
|
4
|
+
|
|
5
|
+
## Responsibilities
|
|
6
|
+
|
|
7
|
+
- Parse interface-level input.
|
|
8
|
+
- Map payloads/options to `application.models.ProcessInput`.
|
|
9
|
+
- Map use-case output/errors to CLI output format.
|
|
10
|
+
- Expose engine/model choices without duplicating backend validation logic.
|
|
11
|
+
|
|
12
|
+
## Current Interfaces
|
|
13
|
+
|
|
14
|
+
- CLI (`cli/app.py`): `aimd <input_source>` with `--output`, `--engine`, `--model`, `--language`, `--save-original`, `--cookies`, `--cookies-from-browser`, `--log-level`, `--raw-transcript`, and `--temp-dir` / `AIMD_TEMP_DIR`.
|
|
15
|
+
- HTTP lives in the `aimd-api` package.
|
|
16
|
+
- MCP lives in the `aimd-mcp` package.
|
|
17
|
+
|
|
18
|
+
## Rules
|
|
19
|
+
|
|
20
|
+
- Do not implement core processing logic in adapters.
|
|
21
|
+
- Reuse shared output persistence helpers from `application/services`.
|
|
22
|
+
- Keep CLI option descriptions aligned with ASR plugin model constants and `ProcessInput`.
|
|
23
|
+
- Do not import infrastructure processing modules directly; adapters should call application use-cases.
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Core CLI adapter package."""
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""CLI adapter package."""
|
aimd/adapters/cli/app.py
ADDED
|
@@ -0,0 +1,216 @@
|
|
|
1
|
+
"""CLI adapter for aimd."""
|
|
2
|
+
|
|
3
|
+
import asyncio
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
import typer
|
|
8
|
+
from dotenv import load_dotenv
|
|
9
|
+
from logly import logger
|
|
10
|
+
|
|
11
|
+
from ...application.bootstrap import build_container
|
|
12
|
+
from ...application.models import ProcessInput
|
|
13
|
+
from ...application.services.output_writer import persist_output
|
|
14
|
+
from ...errors import AimdError
|
|
15
|
+
from ...utils import create_output_path_from_title
|
|
16
|
+
|
|
17
|
+
load_dotenv()
|
|
18
|
+
|
|
19
|
+
logger.remove_all()
|
|
20
|
+
logger.configure(color=True, auto_sink=False)
|
|
21
|
+
logger.add(
|
|
22
|
+
"console",
|
|
23
|
+
filter_min_level="INFO",
|
|
24
|
+
format="{time:YYYY-MM-DD HH:mm:ss} | {level: <8} | {module}:{function} - {message}",
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
app = typer.Typer(
|
|
28
|
+
name="aimd",
|
|
29
|
+
help="Context preparation tool for LLM workflows - Transcribe audio/video and convert documents",
|
|
30
|
+
no_args_is_help=True,
|
|
31
|
+
)
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _configure_logging(log_level: str) -> None:
|
|
35
|
+
valid_levels = ["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"]
|
|
36
|
+
if str(log_level).upper() not in valid_levels:
|
|
37
|
+
typer.echo(
|
|
38
|
+
f"Error: Invalid log level '{log_level}'. Valid levels: {', '.join(valid_levels)}",
|
|
39
|
+
err=True,
|
|
40
|
+
)
|
|
41
|
+
raise typer.Exit(1)
|
|
42
|
+
|
|
43
|
+
logger.remove_all()
|
|
44
|
+
logger.configure(color=True, auto_sink=False)
|
|
45
|
+
logger.add(
|
|
46
|
+
"console",
|
|
47
|
+
filter_min_level=str(log_level).upper(),
|
|
48
|
+
format="{time:YYYY-MM-DD HH:mm:ss} | {level: <8} | {module}:{function} - {message}",
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
if str(log_level).upper() == "DEBUG":
|
|
52
|
+
logger.debug(f"Logging level configured to: {log_level}")
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@app.command()
|
|
56
|
+
def process(
|
|
57
|
+
input_source: str = typer.Argument(
|
|
58
|
+
...,
|
|
59
|
+
help="Audio file, video file, video URL, or document to process",
|
|
60
|
+
),
|
|
61
|
+
output_file: Optional[Path] = typer.Option(
|
|
62
|
+
None,
|
|
63
|
+
"--output",
|
|
64
|
+
"-o",
|
|
65
|
+
help="Output file path. If not specified, auto-generated from input",
|
|
66
|
+
),
|
|
67
|
+
transcribe_engine: str = typer.Option(
|
|
68
|
+
"auto",
|
|
69
|
+
"--engine",
|
|
70
|
+
"-e",
|
|
71
|
+
help="Transcription engine: mlx (Apple Silicon, mlx-audio STT), qwen (Linux/CUDA, Qwen3-ASR via Transformers).",
|
|
72
|
+
),
|
|
73
|
+
model: Optional[str] = typer.Option(
|
|
74
|
+
None,
|
|
75
|
+
"--model",
|
|
76
|
+
"-m",
|
|
77
|
+
help="Model for transcription. mlx defaults to mlx-community/Qwen3-ASR-1.7B-4bit "
|
|
78
|
+
"and also supports other documented mlx-audio STT model IDs. "
|
|
79
|
+
"qwen supports Qwen/Qwen3-ASR-1.7B (default) or Qwen/Qwen3-ASR-0.6B.",
|
|
80
|
+
),
|
|
81
|
+
language: Optional[str] = typer.Option(
|
|
82
|
+
None,
|
|
83
|
+
"--language",
|
|
84
|
+
"-l",
|
|
85
|
+
help="Language code for transcription (e.g., zh, en, ja).",
|
|
86
|
+
),
|
|
87
|
+
save_original: Optional[Path] = typer.Option(
|
|
88
|
+
None,
|
|
89
|
+
"--save-original",
|
|
90
|
+
"-s",
|
|
91
|
+
help="Save original downloaded audio/video file to a path or directory.",
|
|
92
|
+
),
|
|
93
|
+
cookies: Optional[Path] = typer.Option(
|
|
94
|
+
None,
|
|
95
|
+
"--cookies",
|
|
96
|
+
"-c",
|
|
97
|
+
help="Path to Netscape-format cookies file for URL extraction.",
|
|
98
|
+
),
|
|
99
|
+
cookies_from_browser: Optional[str] = typer.Option(
|
|
100
|
+
None,
|
|
101
|
+
"--cookies-from-browser",
|
|
102
|
+
help="Browser cookie source for URL extraction.",
|
|
103
|
+
),
|
|
104
|
+
log_level: str = typer.Option(
|
|
105
|
+
"INFO",
|
|
106
|
+
"--log-level",
|
|
107
|
+
help="Logging level: DEBUG, INFO, WARNING, ERROR, CRITICAL",
|
|
108
|
+
),
|
|
109
|
+
raw_transcript: bool = typer.Option(
|
|
110
|
+
False,
|
|
111
|
+
"--raw-transcript",
|
|
112
|
+
help="Preserve original subtitle formatting (SRT/VTT timestamps). "
|
|
113
|
+
"By default, subtitles are simplified to plain text.",
|
|
114
|
+
),
|
|
115
|
+
temp_dir: Optional[Path] = typer.Option(
|
|
116
|
+
None,
|
|
117
|
+
"--temp-dir",
|
|
118
|
+
help="Custom temporary directory for intermediate files. "
|
|
119
|
+
"Overrides AIMD_TEMP_DIR env var. Useful for sandboxed environments.",
|
|
120
|
+
envvar="AIMD_TEMP_DIR",
|
|
121
|
+
),
|
|
122
|
+
) -> None:
|
|
123
|
+
"""Process audio/video/url/doc inputs to markdown."""
|
|
124
|
+
_configure_logging(log_level)
|
|
125
|
+
container = build_container()
|
|
126
|
+
|
|
127
|
+
try:
|
|
128
|
+
route = container.process_input_use_case.ensure_supported_input(input_source)
|
|
129
|
+
task_type = route.task_type
|
|
130
|
+
except AimdError as e:
|
|
131
|
+
typer.echo(f"Error: {e}", err=True)
|
|
132
|
+
raise typer.Exit(1)
|
|
133
|
+
|
|
134
|
+
logger.info(f"Input: {input_source}")
|
|
135
|
+
logger.info(f"Source: {route.source_kind}")
|
|
136
|
+
logger.info(f"Task: {task_type}")
|
|
137
|
+
if task_type == "transcript":
|
|
138
|
+
logger.info(f"Transcription Engine: {transcribe_engine}")
|
|
139
|
+
|
|
140
|
+
async def run_processing() -> None:
|
|
141
|
+
try:
|
|
142
|
+
resolved_temp_dir = temp_dir
|
|
143
|
+
if resolved_temp_dir is not None:
|
|
144
|
+
resolved_temp_dir.mkdir(parents=True, exist_ok=True)
|
|
145
|
+
logger.info(f"Using custom temp directory: {resolved_temp_dir}")
|
|
146
|
+
|
|
147
|
+
result = await container.process_input_use_case.execute(
|
|
148
|
+
ProcessInput(
|
|
149
|
+
input_source=input_source,
|
|
150
|
+
transcribe_engine=transcribe_engine,
|
|
151
|
+
model=model,
|
|
152
|
+
language=language,
|
|
153
|
+
save_original=save_original,
|
|
154
|
+
cookies=cookies,
|
|
155
|
+
cookies_from_browser=cookies_from_browser,
|
|
156
|
+
temp_dir=resolved_temp_dir,
|
|
157
|
+
raw_transcript=raw_transcript,
|
|
158
|
+
)
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
if result.platform:
|
|
162
|
+
logger.info(f"Platform: {result.platform}")
|
|
163
|
+
|
|
164
|
+
if result.task_type == "convert" and result.output_dir is not None:
|
|
165
|
+
input_path = Path(input_source)
|
|
166
|
+
logger.info(f"Book converted with images to: {result.output_dir}")
|
|
167
|
+
logger.info(f"Main file: {result.output_dir / f'{input_path.stem}.md'}")
|
|
168
|
+
logger.info(f"Images extracted to: {result.output_dir / 'images'}")
|
|
169
|
+
typer.echo("Successfully converted book with images")
|
|
170
|
+
typer.echo(f"Output saved to {result.output_dir}")
|
|
171
|
+
return
|
|
172
|
+
|
|
173
|
+
final_output_file = output_file
|
|
174
|
+
if final_output_file is None:
|
|
175
|
+
suffix = (
|
|
176
|
+
"transcript" if result.task_type == "transcript" else "converted"
|
|
177
|
+
)
|
|
178
|
+
default_dir = (
|
|
179
|
+
Path.cwd()
|
|
180
|
+
if result.task_type == "transcript"
|
|
181
|
+
else Path(input_source).parent
|
|
182
|
+
)
|
|
183
|
+
final_output_file = create_output_path_from_title(
|
|
184
|
+
result.text_context.title,
|
|
185
|
+
suffix,
|
|
186
|
+
default_dir,
|
|
187
|
+
)
|
|
188
|
+
|
|
189
|
+
logger.info(f"Output: {final_output_file}")
|
|
190
|
+
persist_output(
|
|
191
|
+
final_output_file,
|
|
192
|
+
result.task_type,
|
|
193
|
+
result.text_context.chunk_list,
|
|
194
|
+
)
|
|
195
|
+
|
|
196
|
+
if result.task_type == "transcript":
|
|
197
|
+
logger.info(f"Transcript saved to: {final_output_file}")
|
|
198
|
+
typer.echo("Successfully transcribed")
|
|
199
|
+
else:
|
|
200
|
+
logger.info(f"Converted file saved to: {final_output_file}")
|
|
201
|
+
typer.echo("Successfully converted")
|
|
202
|
+
typer.echo(f"Output saved to {final_output_file}")
|
|
203
|
+
except AimdError as e:
|
|
204
|
+
logger.error(str(e))
|
|
205
|
+
raise typer.Exit(1)
|
|
206
|
+
except Exception as e:
|
|
207
|
+
task_name = "Transcription" if task_type == "transcript" else "Conversion"
|
|
208
|
+
logger.error(f"{task_name} failed: {e}")
|
|
209
|
+
raise typer.Exit(1)
|
|
210
|
+
|
|
211
|
+
asyncio.run(run_processing())
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def main() -> None:
|
|
215
|
+
"""Entry point for CLI app."""
|
|
216
|
+
app()
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# packages/aimd/src/aimd/application
|
|
2
|
+
|
|
3
|
+
Application layer: orchestration and canonical models.
|
|
4
|
+
|
|
5
|
+
## Responsibilities
|
|
6
|
+
|
|
7
|
+
- Define request/response models used across interfaces.
|
|
8
|
+
- Implement use-cases (`process_input`, `input_routing`, `list_engines`) and task processors under `use_cases/processors/`.
|
|
9
|
+
- Wire dependencies explicitly in `bootstrap.py`.
|
|
10
|
+
- Keep output persistence in `services/output_writer.py` so CLI/API/MCP share identical file behavior.
|
|
11
|
+
|
|
12
|
+
## Current Flow
|
|
13
|
+
|
|
14
|
+
- `ProcessInput` carries `input_source`, output path, engine/model/language, URL cookie options, `save_original`, `raw_transcript`, and `temp_dir`.
|
|
15
|
+
- `input_routing.py` returns `InputRoute(source_kind, task_type)`.
|
|
16
|
+
- `source_kind` describes what the user supplied: `url`, `audio_file`, `video_file`, `document_file`, or `unknown`.
|
|
17
|
+
- `task_type` describes which processing task runs: currently `transcript` or `convert`.
|
|
18
|
+
- `process_input.py` is the facade/router: it dispatches `InputRoute` to configured `TaskProcessor` objects.
|
|
19
|
+
- `processors/transcript.py` owns URL/audio transcript flow and resolves transcription engines early.
|
|
20
|
+
- `processors/convert.py` owns local document conversion flow through MarkItDown.
|
|
21
|
+
- `ListEnginesUseCase` returns capability detector output for adapter `/v1/engines` and MCP `list_engines`.
|
|
22
|
+
|
|
23
|
+
## Rules
|
|
24
|
+
|
|
25
|
+
- No direct adapter imports.
|
|
26
|
+
- Keep business flow in use-cases; avoid embedding third-party integration logic here.
|
|
27
|
+
- Keep each task implementation in its own processor module; local file conversion should call the MarkItDown runner rather than feature package internals.
|
|
28
|
+
- Route by the pair `(source_kind, task_type)`; avoid collapsing source classification into task naming.
|
|
29
|
+
- Keep dependency registration explicit in `bootstrap.py`; do not add a custom aimd plugin registry or priority routing until requirements justify it.
|
|
30
|
+
- Do not hard-code platform/model availability in application code; use infrastructure capability detection and constants.
|
|
31
|
+
- Preserve the `TextContext(title, chunk_list, split_header_level)` contract across interfaces.
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Application layer package."""
|
|
2
|
+
|
|
3
|
+
from .bootstrap import AppContainer, build_container
|
|
4
|
+
from .models import InputRoute, ProcessInput, ProcessResult, SourceKind, TaskType
|
|
5
|
+
|
|
6
|
+
__all__ = [
|
|
7
|
+
"AppContainer",
|
|
8
|
+
"InputRoute",
|
|
9
|
+
"ProcessInput",
|
|
10
|
+
"ProcessResult",
|
|
11
|
+
"SourceKind",
|
|
12
|
+
"TaskType",
|
|
13
|
+
"build_container",
|
|
14
|
+
]
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""Application bootstrap and explicit dependency wiring."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
|
|
5
|
+
from aimd_media import (
|
|
6
|
+
get_engine_capabilities,
|
|
7
|
+
resolve_engine_with_preflight,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
from ..infrastructure.markitdown_processor import (
|
|
11
|
+
convert_file_with_markitdown,
|
|
12
|
+
is_supported_file,
|
|
13
|
+
)
|
|
14
|
+
from ..infrastructure.media_processor import get_text_context_from_media_url
|
|
15
|
+
from .use_cases.list_engines import ListEnginesUseCase
|
|
16
|
+
from .use_cases.process_input import ProcessInputUseCase
|
|
17
|
+
from .use_cases.processors import ConvertTaskProcessor, TranscriptTaskProcessor
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@dataclass(slots=True)
|
|
21
|
+
class AppContainer:
|
|
22
|
+
"""Resolved dependencies and use-cases for all adapters."""
|
|
23
|
+
|
|
24
|
+
process_input_use_case: ProcessInputUseCase
|
|
25
|
+
list_engines_use_case: ListEnginesUseCase
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def build_container() -> AppContainer:
|
|
29
|
+
"""Build app container with explicit dependency wiring."""
|
|
30
|
+
transcript_processor = TranscriptTaskProcessor(
|
|
31
|
+
process_url=get_text_context_from_media_url,
|
|
32
|
+
process_file=convert_file_with_markitdown,
|
|
33
|
+
resolve_engine=resolve_engine_with_preflight,
|
|
34
|
+
)
|
|
35
|
+
convert_processor = ConvertTaskProcessor(
|
|
36
|
+
process_file=convert_file_with_markitdown,
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
return AppContainer(
|
|
40
|
+
process_input_use_case=ProcessInputUseCase(
|
|
41
|
+
processors={
|
|
42
|
+
"transcript": transcript_processor,
|
|
43
|
+
"convert": convert_processor,
|
|
44
|
+
},
|
|
45
|
+
is_supported_file=is_supported_file,
|
|
46
|
+
),
|
|
47
|
+
list_engines_use_case=ListEnginesUseCase(
|
|
48
|
+
get_capabilities=get_engine_capabilities,
|
|
49
|
+
resolve_engine=resolve_engine_with_preflight,
|
|
50
|
+
),
|
|
51
|
+
)
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
"""Application-level request/response models."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Literal
|
|
6
|
+
|
|
7
|
+
from ..types import TextContext
|
|
8
|
+
|
|
9
|
+
SourceKind = Literal["url", "audio_file", "video_file", "document_file", "unknown"]
|
|
10
|
+
TaskType = Literal["transcript", "convert"]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
@dataclass(slots=True, frozen=True)
|
|
14
|
+
class InputRoute:
|
|
15
|
+
"""Classified input source and selected processing task."""
|
|
16
|
+
|
|
17
|
+
source_kind: SourceKind
|
|
18
|
+
task_type: TaskType | None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass(slots=True)
|
|
22
|
+
class ProcessInput:
|
|
23
|
+
"""Canonical process request model consumed by use-cases."""
|
|
24
|
+
|
|
25
|
+
input_source: str
|
|
26
|
+
transcribe_engine: str = "auto"
|
|
27
|
+
model: str | None = None
|
|
28
|
+
language: str | None = None
|
|
29
|
+
save_original: Path | None = None
|
|
30
|
+
cookies: Path | None = None
|
|
31
|
+
cookies_from_browser: str | None = None
|
|
32
|
+
temp_dir: Path | None = None
|
|
33
|
+
raw_transcript: bool = False
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass(slots=True)
|
|
37
|
+
class ProcessResult:
|
|
38
|
+
"""Canonical process response model produced by use-cases."""
|
|
39
|
+
|
|
40
|
+
task_type: TaskType
|
|
41
|
+
text_context: TextContext
|
|
42
|
+
output_dir: Path | None = None
|
|
43
|
+
platform: str | None = None
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Application service helpers."""
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""Shared adapter mapping helpers."""
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from ..models import ProcessInput, ProcessResult
|
|
8
|
+
from ..use_cases.list_engines import ListEnginesResult
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def get_request_temp_dir() -> Path | None:
|
|
12
|
+
"""Read and prepare AIMD_TEMP_DIR at request time."""
|
|
13
|
+
env_temp_dir = os.environ.get("AIMD_TEMP_DIR")
|
|
14
|
+
if not env_temp_dir:
|
|
15
|
+
return None
|
|
16
|
+
|
|
17
|
+
temp_dir = Path(env_temp_dir)
|
|
18
|
+
temp_dir.mkdir(parents=True, exist_ok=True)
|
|
19
|
+
return temp_dir
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def build_process_input(
|
|
23
|
+
*,
|
|
24
|
+
input_source: str,
|
|
25
|
+
transcribe_engine: str = "auto",
|
|
26
|
+
model: str | None = None,
|
|
27
|
+
language: str | None = None,
|
|
28
|
+
save_original: str | Path | None = None,
|
|
29
|
+
cookies: str | Path | None = None,
|
|
30
|
+
cookies_from_browser: str | None = None,
|
|
31
|
+
raw_transcript: bool = False,
|
|
32
|
+
temp_dir: Path | None = None,
|
|
33
|
+
) -> ProcessInput:
|
|
34
|
+
"""Build the canonical process request from adapter-level values."""
|
|
35
|
+
return ProcessInput(
|
|
36
|
+
input_source=input_source,
|
|
37
|
+
transcribe_engine=transcribe_engine,
|
|
38
|
+
model=model,
|
|
39
|
+
language=language,
|
|
40
|
+
save_original=Path(save_original) if save_original else None,
|
|
41
|
+
cookies=Path(cookies) if cookies else None,
|
|
42
|
+
cookies_from_browser=cookies_from_browser,
|
|
43
|
+
temp_dir=temp_dir,
|
|
44
|
+
raw_transcript=raw_transcript,
|
|
45
|
+
)
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def engine_capabilities_payload(result: ListEnginesResult) -> dict[str, Any]:
|
|
49
|
+
"""Return a JSON-friendly engine capability payload."""
|
|
50
|
+
ordered_engines = ("mlx", "qwen")
|
|
51
|
+
return {
|
|
52
|
+
"auto_selected_engine": result.auto_selected_engine,
|
|
53
|
+
"engines": [
|
|
54
|
+
{
|
|
55
|
+
"name": engine,
|
|
56
|
+
"available": result.engines[engine].available,
|
|
57
|
+
"reason": result.engines[engine].reason,
|
|
58
|
+
"fix_hint": result.engines[engine].fix_hint,
|
|
59
|
+
"selected_by_auto": engine == result.auto_selected_engine,
|
|
60
|
+
}
|
|
61
|
+
for engine in ordered_engines
|
|
62
|
+
],
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
def process_result_payload(
|
|
67
|
+
result: ProcessResult,
|
|
68
|
+
*,
|
|
69
|
+
output_file: str | None,
|
|
70
|
+
output_dir: str | None,
|
|
71
|
+
) -> dict[str, Any]:
|
|
72
|
+
"""Return a JSON-friendly process result payload."""
|
|
73
|
+
return {
|
|
74
|
+
"task_type": result.task_type,
|
|
75
|
+
"title": result.text_context.title,
|
|
76
|
+
"chunk_list": result.text_context.chunk_list,
|
|
77
|
+
"split_header_level": result.text_context.split_header_level,
|
|
78
|
+
"platform": result.platform,
|
|
79
|
+
"output_file": output_file,
|
|
80
|
+
"output_dir": output_dir,
|
|
81
|
+
}
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
"""Shared output persistence helpers for adapters and use-cases."""
|
|
2
|
+
|
|
3
|
+
from dataclasses import dataclass
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
from typing import Literal
|
|
6
|
+
|
|
7
|
+
from ..models import ProcessResult
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def build_output_text(
|
|
11
|
+
task_type: Literal["transcript", "convert"],
|
|
12
|
+
chunk_list: list[str],
|
|
13
|
+
) -> str:
|
|
14
|
+
"""Build persisted markdown text for the given task output."""
|
|
15
|
+
if task_type == "transcript":
|
|
16
|
+
text = chunk_list[0] if chunk_list else ""
|
|
17
|
+
if not text:
|
|
18
|
+
raise ValueError("Transcription returned empty content")
|
|
19
|
+
return text
|
|
20
|
+
|
|
21
|
+
return "\n\n".join(chunk_list)
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def persist_output(
|
|
25
|
+
output_file: Path,
|
|
26
|
+
task_type: Literal["transcript", "convert"],
|
|
27
|
+
chunk_list: list[str],
|
|
28
|
+
) -> Path:
|
|
29
|
+
"""Write task output to disk and return resolved path."""
|
|
30
|
+
text = build_output_text(task_type, chunk_list)
|
|
31
|
+
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
32
|
+
output_file.write_text(text, encoding="utf-8")
|
|
33
|
+
return output_file.resolve()
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@dataclass(slots=True, frozen=True)
|
|
37
|
+
class PersistedOutput:
|
|
38
|
+
"""Adapter-facing output locations after optional persistence."""
|
|
39
|
+
|
|
40
|
+
output_file: str | None
|
|
41
|
+
output_dir: str | None
|
|
42
|
+
ignored_output_file: bool = False
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def persist_result_output_if_requested(
|
|
46
|
+
result: ProcessResult,
|
|
47
|
+
requested_output_file: str | Path | None,
|
|
48
|
+
) -> PersistedOutput:
|
|
49
|
+
"""Persist a result when an adapter requested a file output."""
|
|
50
|
+
output_dir = (
|
|
51
|
+
str(result.output_dir.resolve()) if result.output_dir is not None else None
|
|
52
|
+
)
|
|
53
|
+
if requested_output_file is None:
|
|
54
|
+
return PersistedOutput(output_file=None, output_dir=output_dir)
|
|
55
|
+
|
|
56
|
+
if result.output_dir is not None:
|
|
57
|
+
return PersistedOutput(
|
|
58
|
+
output_file=None,
|
|
59
|
+
output_dir=output_dir,
|
|
60
|
+
ignored_output_file=True,
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
resolved = persist_output(
|
|
64
|
+
Path(requested_output_file),
|
|
65
|
+
result.task_type,
|
|
66
|
+
result.text_context.chunk_list,
|
|
67
|
+
)
|
|
68
|
+
return PersistedOutput(output_file=str(resolved), output_dir=None)
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
"""Application use-cases."""
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
"""Input classification and support checks for processing use-cases."""
|
|
2
|
+
|
|
3
|
+
from pathlib import Path
|
|
4
|
+
from typing import Callable
|
|
5
|
+
|
|
6
|
+
from ...errors import InputNotFoundError, UnsupportedInputError
|
|
7
|
+
from ...utils import is_url
|
|
8
|
+
from ..models import InputRoute
|
|
9
|
+
|
|
10
|
+
from aimd_media.const import AUDIO_FILE_EXTENSIONS, VIDEO_FILE_EXTENSIONS
|
|
11
|
+
|
|
12
|
+
FileSupportChecker = Callable[[str | Path], bool]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def get_input_route(
|
|
16
|
+
input_source: str, is_supported_file: FileSupportChecker
|
|
17
|
+
) -> InputRoute:
|
|
18
|
+
"""Classify a source and select the processing task."""
|
|
19
|
+
if is_url(input_source):
|
|
20
|
+
return InputRoute(source_kind="url", task_type="transcript")
|
|
21
|
+
|
|
22
|
+
try:
|
|
23
|
+
file_path = Path(input_source)
|
|
24
|
+
if file_path.exists():
|
|
25
|
+
suffix = file_path.suffix.lower()
|
|
26
|
+
if suffix in AUDIO_FILE_EXTENSIONS:
|
|
27
|
+
return InputRoute(source_kind="audio_file", task_type="transcript")
|
|
28
|
+
if suffix in VIDEO_FILE_EXTENSIONS:
|
|
29
|
+
return InputRoute(source_kind="video_file", task_type="transcript")
|
|
30
|
+
if is_supported_file(file_path):
|
|
31
|
+
return InputRoute(source_kind="document_file", task_type="convert")
|
|
32
|
+
except (OSError, ValueError):
|
|
33
|
+
pass
|
|
34
|
+
|
|
35
|
+
return InputRoute(source_kind="unknown", task_type=None)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
def ensure_supported_input(
|
|
39
|
+
input_source: str, is_supported_file: FileSupportChecker
|
|
40
|
+
) -> InputRoute:
|
|
41
|
+
"""Validate and return supported input route, else raise domain error."""
|
|
42
|
+
route = get_input_route(input_source, is_supported_file)
|
|
43
|
+
if route.task_type is None:
|
|
44
|
+
input_path = Path(input_source)
|
|
45
|
+
if not is_url(input_source) and input_path.suffix and not input_path.exists():
|
|
46
|
+
raise InputNotFoundError(f"Input file not found: {input_source}")
|
|
47
|
+
raise UnsupportedInputError(
|
|
48
|
+
"Unsupported input source. Supported inputs: audio/video files, "
|
|
49
|
+
"video URLs, and supported document files."
|
|
50
|
+
)
|
|
51
|
+
return route
|