aimd-cli 0.9.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. aimd/AGENTS.md +68 -0
  2. aimd/__init__.py +13 -0
  3. aimd/adapters/AGENTS.md +23 -0
  4. aimd/adapters/__init__.py +1 -0
  5. aimd/adapters/cli/__init__.py +1 -0
  6. aimd/adapters/cli/app.py +216 -0
  7. aimd/application/AGENTS.md +31 -0
  8. aimd/application/__init__.py +14 -0
  9. aimd/application/bootstrap.py +51 -0
  10. aimd/application/models.py +43 -0
  11. aimd/application/services/__init__.py +1 -0
  12. aimd/application/services/interface_payloads.py +81 -0
  13. aimd/application/services/output_writer.py +68 -0
  14. aimd/application/use_cases/__init__.py +1 -0
  15. aimd/application/use_cases/input_routing.py +51 -0
  16. aimd/application/use_cases/list_engines.py +34 -0
  17. aimd/application/use_cases/process_input.py +40 -0
  18. aimd/application/use_cases/processors/__init__.py +13 -0
  19. aimd/application/use_cases/processors/_base.py +17 -0
  20. aimd/application/use_cases/processors/convert.py +35 -0
  21. aimd/application/use_cases/processors/transcript.py +92 -0
  22. aimd/cli.py +9 -0
  23. aimd/const.py +31 -0
  24. aimd/errors.py +41 -0
  25. aimd/infrastructure/AGENTS.md +26 -0
  26. aimd/infrastructure/__init__.py +1 -0
  27. aimd/infrastructure/documents/__init__.py +19 -0
  28. aimd/infrastructure/documents/chunking.py +168 -0
  29. aimd/infrastructure/documents/title_extractor.py +90 -0
  30. aimd/infrastructure/markitdown_processor.py +103 -0
  31. aimd/infrastructure/media_processor.py +51 -0
  32. aimd/platform_utils.py +26 -0
  33. aimd/py.typed +0 -0
  34. aimd/types.py +12 -0
  35. aimd/utils.py +70 -0
  36. aimd_cli-0.9.2.dist-info/METADATA +23 -0
  37. aimd_cli-0.9.2.dist-info/RECORD +39 -0
  38. aimd_cli-0.9.2.dist-info/WHEEL +4 -0
  39. aimd_cli-0.9.2.dist-info/entry_points.txt +3 -0
@@ -0,0 +1,103 @@
1
+ """MarkItDown-backed local file conversion."""
2
+
3
+ import asyncio
4
+ from functools import partial
5
+ from pathlib import Path
6
+
7
+ from markitdown import MarkItDown
8
+
9
+ from ..const import BOOK_EXTENSIONS, MARKITDOWN_FILE_EXTENSIONS
10
+ from ..errors import InputNotFoundError, UnsupportedInputError
11
+ from ..types import TextContext
12
+ from .documents.chunking import (
13
+ combine_sections_for_processing,
14
+ split_markdown_by_headers,
15
+ )
16
+ from .documents.title_extractor import extract_title_from_content
17
+
18
+
19
+ def is_supported_file(file_path: str | Path) -> bool:
20
+ """Return whether a local file extension should be offered to MarkItDown."""
21
+ if isinstance(file_path, str) and file_path.startswith(("http://", "https://")):
22
+ return False
23
+ return Path(file_path).suffix.lower() in MARKITDOWN_FILE_EXTENSIONS
24
+
25
+
26
+ def _text_context_from_markdown(
27
+ markdown: str,
28
+ fallback_title: str,
29
+ title: str | None,
30
+ max_chunk_size: int,
31
+ ) -> TextContext:
32
+ """Convert MarkItDown markdown output into aimd's TextContext shape."""
33
+ resolved_title = title or extract_title_from_content(markdown, fallback_title)
34
+ stripped = markdown.strip()
35
+ if len(stripped) <= max_chunk_size:
36
+ return TextContext(
37
+ title=resolved_title,
38
+ chunk_list=[stripped] if stripped else [],
39
+ split_header_level=None,
40
+ )
41
+
42
+ sections, header_level = split_markdown_by_headers(
43
+ stripped,
44
+ max_chunk_size=max_chunk_size,
45
+ )
46
+ section_data = [
47
+ (section_title, section_content.strip())
48
+ for section_title, section_content in sections
49
+ if section_content.strip()
50
+ ]
51
+ chunks = combine_sections_for_processing(section_data, max_chunk_size)
52
+ return TextContext(
53
+ title=resolved_title,
54
+ chunk_list=chunks,
55
+ split_header_level=header_level,
56
+ )
57
+
58
+
59
+ async def convert_file_with_markitdown(
60
+ file_path: str | Path,
61
+ transcribe_engine: str = "auto",
62
+ language: str | None = None,
63
+ model: str | None = None,
64
+ temp_dir: Path | None = None,
65
+ *,
66
+ max_chunk_size: int = 40000,
67
+ ) -> tuple[TextContext, Path | None]:
68
+ """Convert a local file through MarkItDown and installed aimd plugins."""
69
+ input_path = Path(file_path)
70
+ if not input_path.exists():
71
+ raise InputNotFoundError(f"Input file not found: {input_path}")
72
+ if not input_path.is_file():
73
+ raise UnsupportedInputError(f"Path is not a file: {input_path}")
74
+
75
+ suffix = input_path.suffix.lower()
76
+ output_dir = (
77
+ input_path.parent / input_path.stem if suffix in BOOK_EXTENSIONS else None
78
+ )
79
+
80
+ md = MarkItDown(enable_plugins=True)
81
+ loop = asyncio.get_running_loop()
82
+ result = await loop.run_in_executor(
83
+ None,
84
+ partial(
85
+ md.convert,
86
+ input_path,
87
+ transcribe_engine=transcribe_engine,
88
+ language=language,
89
+ model=model,
90
+ temp_dir=temp_dir,
91
+ output_dir=output_dir,
92
+ ),
93
+ )
94
+ markdown = result.markdown
95
+ return (
96
+ _text_context_from_markdown(
97
+ markdown,
98
+ fallback_title=input_path.stem,
99
+ title=result.title,
100
+ max_chunk_size=max_chunk_size,
101
+ ),
102
+ output_dir,
103
+ )
@@ -0,0 +1,51 @@
1
+ """aimd wrappers around the aimd-media package."""
2
+
3
+ from pathlib import Path
4
+
5
+ from aimd_media.errors import ProcessingFailedError as MediaProcessingFailedError
6
+ from aimd_media.errors import UnsupportedInputError as MediaUnsupportedInputError
7
+ from aimd_media.url import get_text_from_url
8
+
9
+ from ..errors import ProcessingFailedError, UnsupportedInputError
10
+ from ..types import TextContext
11
+ from .markitdown_processor import _text_context_from_markdown
12
+
13
+
14
+ async def get_text_context_from_media_url(
15
+ url: str,
16
+ transcribe_engine: str = "auto",
17
+ language: str | None = None,
18
+ model: str | None = None,
19
+ save_original_path: Path | None = None,
20
+ cookies_file: str | None = None,
21
+ cookies_from_browser: str | None = None,
22
+ temp_dir: Path | None = None,
23
+ raw_transcript: bool = False,
24
+ ) -> tuple[TextContext, str]:
25
+ """Extract a media URL through aimd-media and wrap it as TextContext."""
26
+ try:
27
+ result = await get_text_from_url(
28
+ url=url,
29
+ transcribe_engine=transcribe_engine,
30
+ language=language,
31
+ model=model,
32
+ save_original_path=save_original_path,
33
+ cookies_file=cookies_file,
34
+ cookies_from_browser=cookies_from_browser,
35
+ temp_dir=temp_dir,
36
+ raw_transcript=raw_transcript,
37
+ )
38
+ except MediaUnsupportedInputError as exc:
39
+ raise UnsupportedInputError(str(exc)) from exc
40
+ except MediaProcessingFailedError as exc:
41
+ raise ProcessingFailedError(str(exc)) from exc
42
+
43
+ return (
44
+ _text_context_from_markdown(
45
+ result.markdown,
46
+ fallback_title=result.title,
47
+ title=result.title,
48
+ max_chunk_size=40000,
49
+ ),
50
+ result.platform,
51
+ )
aimd/platform_utils.py ADDED
@@ -0,0 +1,26 @@
1
+ """Platform and hardware detection helpers."""
2
+
3
+ from functools import lru_cache
4
+ import platform
5
+ import subprocess
6
+
7
+
8
+ @lru_cache(maxsize=1)
9
+ def is_apple_silicon() -> bool:
10
+ """Return True when running on Apple Silicon macOS."""
11
+ if platform.system() != "Darwin":
12
+ return False
13
+ try:
14
+ result = subprocess.run(
15
+ ["sysctl", "-n", "machdep.cpu.brand_string"],
16
+ capture_output=True,
17
+ text=True,
18
+ check=True,
19
+ )
20
+ except (subprocess.SubprocessError, FileNotFoundError):
21
+ return False
22
+
23
+ cpu_info = result.stdout.strip().lower()
24
+ return "apple" in cpu_info and any(
25
+ chip in cpu_info for chip in ("m1", "m2", "m3", "m4")
26
+ )
aimd/py.typed ADDED
File without changes
aimd/types.py ADDED
@@ -0,0 +1,12 @@
1
+ from pydantic import BaseModel, Field
2
+
3
+
4
+ class TextContext(BaseModel):
5
+ """Context for text processing with title and content."""
6
+
7
+ title: str = Field(..., description="Title of the text")
8
+ chunk_list: list[str] = Field(..., description="List of combined text chunks")
9
+ split_header_level: int | None = Field(
10
+ default=None,
11
+ description="Header level used for splitting (1-6), None if no splitting was done",
12
+ )
aimd/utils.py ADDED
@@ -0,0 +1,70 @@
1
+ import re
2
+ from pathlib import Path
3
+ from urllib.parse import urlparse
4
+
5
+
6
+ def sanitize_filename(title: str, max_length: int = 100) -> str:
7
+ """Sanitize title for use as filename.
8
+
9
+ Args:
10
+ title: Title to sanitize
11
+ max_length: Maximum length for filename
12
+
13
+ Returns:
14
+ Sanitized filename safe for filesystem
15
+ """
16
+
17
+ # Remove or replace invalid characters
18
+ sanitized = re.sub(r'[<>:"/\|?*]', "_", title)
19
+ # Remove extra whitespace and replace with underscores
20
+ sanitized = re.sub(r"\s+", "_", sanitized.strip())
21
+ # Remove leading/trailing dots and underscores
22
+ sanitized = sanitized.strip("._")
23
+ # Limit length
24
+ if len(sanitized) > max_length:
25
+ sanitized = sanitized[:max_length].rstrip("._")
26
+ # Ensure we have a valid filename
27
+ if not sanitized:
28
+ sanitized = "output"
29
+ return sanitized
30
+
31
+
32
+ def create_output_path_from_title(
33
+ title: str, template_name: str, current_dir: Path = None
34
+ ) -> Path:
35
+ """Create output path using title and template name.
36
+
37
+ Args:
38
+ title: Title from TextContext
39
+ template_name: Template name for suffix
40
+ current_dir: Directory to save file (defaults to current working directory)
41
+
42
+ Returns:
43
+ Output path with sanitized title and template suffix
44
+ """
45
+ if current_dir is None:
46
+ current_dir = Path.cwd()
47
+
48
+ sanitized_title = sanitize_filename(title)
49
+ filename = f"{sanitized_title}_{template_name}.md"
50
+ return current_dir / filename
51
+
52
+
53
+ def is_url(s: str) -> bool:
54
+ """Check if a string is a URL using basic URL parsing.
55
+
56
+ Args:
57
+ s: String to check
58
+
59
+ Returns:
60
+ True if string appears to be a URL
61
+ """
62
+ try:
63
+ result = urlparse(s)
64
+ # A non-empty scheme and netloc are strong indicators of a URL.
65
+ # We check for scheme presence, and also for netloc to catch schemeless URLs like "www.google.com".
66
+ return all([result.scheme, result.netloc]) or (
67
+ result.scheme in ["http", "https"] and not result.netloc
68
+ )
69
+ except ValueError:
70
+ return False
@@ -0,0 +1,23 @@
1
+ Metadata-Version: 2.3
2
+ Name: aimd-cli
3
+ Version: 0.9.2
4
+ Summary: Context preparation tool for LLM workflows.
5
+ Author: Shu Li
6
+ Author-email: Shu Li <zetarylee@gmail.com>
7
+ Requires-Dist: aimd-media
8
+ Requires-Dist: aimd-book
9
+ Requires-Dist: logly>=0.1.6
10
+ Requires-Dist: markitdown>=0.1.1,<0.2.0
11
+ Requires-Dist: pydantic>=2.0.0
12
+ Requires-Dist: python-dotenv>=1.1.1
13
+ Requires-Dist: rich>=13.9.4
14
+ Requires-Dist: typer>=0.20.0
15
+ Requires-Dist: aimd-api ; extra == 'all'
16
+ Requires-Dist: aimd-mcp ; extra == 'all'
17
+ Requires-Dist: aimd-ocr ; extra == 'all'
18
+ Requires-Dist: aimd-html ; extra == 'all'
19
+ Requires-Python: >=3.10, <3.13
20
+ Provides-Extra: all
21
+ Description-Content-Type: text/markdown
22
+
23
+ Prepare LLM-ready context from URLs, audio/video, and documents.
@@ -0,0 +1,39 @@
1
+ aimd/AGENTS.md,sha256=qWZdW57zt9dmCADPdDbSjmxfyH7pWMeECtA3PUpuGq0,3954
2
+ aimd/__init__.py,sha256=mKPHHwULavRIzkNJ9htoTO9FO_8TgS__g6E0_BpaD0A,241
3
+ aimd/adapters/AGENTS.md,sha256=l1MEcwqziBKC28oXJsamh2vDTh3sdpKQbHSfbUXy84E,982
4
+ aimd/adapters/__init__.py,sha256=kPCiAvKFPgizgLXLTrC4RW1oZkbMRW5SFLoTnBCJHZA,32
5
+ aimd/adapters/cli/__init__.py,sha256=R6ccDx1BPyu31QO3lmw57_eCke4CkeAZAuEbGLDk96s,27
6
+ aimd/adapters/cli/app.py,sha256=-MS60tmFkGDJtd50CcxE16i2vAVq0br96xt5nlf2gsM,7465
7
+ aimd/application/AGENTS.md,sha256=LAeM0Jm2-HsAXJbNqqNO-wfX-CZ8elGrIXJn7KioUy4,2046
8
+ aimd/application/__init__.py,sha256=fmMLANSxZPj3RKJ4srXjHEDG0ZgDvcTjLeuv1TDpKi0,320
9
+ aimd/application/bootstrap.py,sha256=JJGZK-Il4vo5-cpQqCqZxwi9sa3Xn3GZujdHGTUKI6Q,1650
10
+ aimd/application/models.py,sha256=PGmEkiKaGLYz7EsOneWZioSAfaVR1A3NxNkJX4qcQKA,1122
11
+ aimd/application/services/__init__.py,sha256=7O6qLl42-prqJ_SujBpJffj3T9__V6pLXHu49TEw-E4,35
12
+ aimd/application/services/interface_payloads.py,sha256=IwJViNVoUu06FkVhETQ9lt59RFcIh1lm9LtGYO1Q1vE,2528
13
+ aimd/application/services/output_writer.py,sha256=PCoYRK-B-Ul6JoI1SAA7_YDYVgTc0_XMWbWytALrmUQ,2033
14
+ aimd/application/use_cases/__init__.py,sha256=Tv3oOFjorCm6Etffgl4HsIUrmgtqgc6-c_OLQxGa8Ow,29
15
+ aimd/application/use_cases/input_routing.py,sha256=7gvv03tSmvEz1oNr2AIU3BIOQPxsGwUXxI7dZkDw1G0,1946
16
+ aimd/application/use_cases/list_engines.py,sha256=QIVjRguTA2NKEWlitaBn9AN-BzW6g6_aweSqiTc7rKk,1000
17
+ aimd/application/use_cases/process_input.py,sha256=Q7UzOxmAj8Cl048o_uDpB0GECeEtm6cyzlc9c_FCxHY,1540
18
+ aimd/application/use_cases/processors/__init__.py,sha256=XXYSDDR3lYRxmyRSEw-lsB4xEejc7mVPg_cK0CPFgow,365
19
+ aimd/application/use_cases/processors/_base.py,sha256=1B5BjSzhrP6CNhDqVOcGfwj4dGooQHmOvqmRDAEifzA,415
20
+ aimd/application/use_cases/processors/convert.py,sha256=m5VBEMjmlVUjJDpwQGs477HdHRvne0uSFuNmqHMPRQ4,974
21
+ aimd/application/use_cases/processors/transcript.py,sha256=SMyX4H2nJZNM8V1Wotw-uQdgIgoVy1kIwPQGShgDV5U,2630
22
+ aimd/cli.py,sha256=iJYbgVudhscnwgFV51DgtVXP-uC_OtEnMqwSmMdUBHc,130
23
+ aimd/const.py,sha256=8ptoja8E7lgOdorDqgG3mF12oI81fykHrfhLaiOYxcg,724
24
+ aimd/errors.py,sha256=pjMg0qPUZmly7nvke0MDYoZGp0-Gm2PD3rfbdjHScsw,902
25
+ aimd/infrastructure/AGENTS.md,sha256=OHIS53QxMfo9bABNqsFo6PYfIuqKYrZNPYS5J6VYFaU,1782
26
+ aimd/infrastructure/__init__.py,sha256=D_StCHKRqyvxeKpP-_lrddfbBbJe75K86cT5biZvmcw,36
27
+ aimd/infrastructure/documents/__init__.py,sha256=efTOlhTS6Yl054gdPxPIKlyyRDTgbHiVrPvZkb4-Svk,534
28
+ aimd/infrastructure/documents/chunking.py,sha256=3e-JylggA0qxYlCFGsy2DNC9uSSSrUV_bwiTWtcmGRU,5542
29
+ aimd/infrastructure/documents/title_extractor.py,sha256=9dCjBoP-0PRRiXutTxyEnzMnxGoCbHy6rFZ5KL3W4UE,3215
30
+ aimd/infrastructure/markitdown_processor.py,sha256=K8jajNE27KdMToLLI3LRol_3zh6DuO4sdgeIMuiDRHE,3224
31
+ aimd/infrastructure/media_processor.py,sha256=lDCBJvx0cVL0N17f8OK9CjmXUirdsdnJA4LhqaM-gZ8,1745
32
+ aimd/platform_utils.py,sha256=uM-Mj2fXLpZVoh-r0Ywe8xYNKgo-oEyrD7DqIeBSWDg,713
33
+ aimd/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
+ aimd/types.py,sha256=OpedoG-3X5v-xIwKxKH6zxe0cebj6gzI2DafVVQFLNU,441
35
+ aimd/utils.py,sha256=lc0uQQVK3RSyN2fAXLyMU-Psbi3gWQT0lhj8fRFc7vA,2076
36
+ aimd_cli-0.9.2.dist-info/WHEEL,sha256=eh7sammvW2TypMMMGKgsM83HyA_3qQ5Lgg3ynoecH3M,79
37
+ aimd_cli-0.9.2.dist-info/entry_points.txt,sha256=Q5kTy2HVVf4rZGtsnhXxsabRQ8uX1ZloE0hIacd4fH0,40
38
+ aimd_cli-0.9.2.dist-info/METADATA,sha256=iuO6q7aLUaWhYqYNKTjjIdMU_u24A7Sd6tV5IbwOcIQ,727
39
+ aimd_cli-0.9.2.dist-info/RECORD,,
@@ -0,0 +1,4 @@
1
+ Wheel-Version: 1.0
2
+ Generator: uv 0.8.24
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
@@ -0,0 +1,3 @@
1
+ [console_scripts]
2
+ aimd = aimd.cli:main
3
+