markitai 0.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- markitai/__init__.py +3 -0
- markitai/batch.py +1316 -0
- markitai/cli.py +3979 -0
- markitai/config.py +602 -0
- markitai/config.schema.json +748 -0
- markitai/constants.py +222 -0
- markitai/converter/__init__.py +49 -0
- markitai/converter/_patches.py +98 -0
- markitai/converter/base.py +164 -0
- markitai/converter/image.py +181 -0
- markitai/converter/legacy.py +606 -0
- markitai/converter/office.py +526 -0
- markitai/converter/pdf.py +679 -0
- markitai/converter/text.py +63 -0
- markitai/fetch.py +1725 -0
- markitai/image.py +1335 -0
- markitai/json_order.py +550 -0
- markitai/llm.py +4339 -0
- markitai/ocr.py +347 -0
- markitai/prompts/__init__.py +159 -0
- markitai/prompts/cleaner.md +93 -0
- markitai/prompts/document_enhance.md +77 -0
- markitai/prompts/document_enhance_complete.md +65 -0
- markitai/prompts/document_process.md +60 -0
- markitai/prompts/frontmatter.md +28 -0
- markitai/prompts/image_analysis.md +21 -0
- markitai/prompts/image_caption.md +8 -0
- markitai/prompts/image_description.md +13 -0
- markitai/prompts/page_content.md +17 -0
- markitai/prompts/url_enhance.md +78 -0
- markitai/security.py +286 -0
- markitai/types.py +30 -0
- markitai/urls.py +187 -0
- markitai/utils/__init__.py +33 -0
- markitai/utils/executor.py +69 -0
- markitai/utils/mime.py +85 -0
- markitai/utils/office.py +262 -0
- markitai/utils/output.py +53 -0
- markitai/utils/paths.py +81 -0
- markitai/utils/text.py +359 -0
- markitai/workflow/__init__.py +37 -0
- markitai/workflow/core.py +760 -0
- markitai/workflow/helpers.py +509 -0
- markitai/workflow/single.py +369 -0
- markitai-0.3.0.dist-info/METADATA +159 -0
- markitai-0.3.0.dist-info/RECORD +48 -0
- markitai-0.3.0.dist-info/WHEEL +4 -0
- markitai-0.3.0.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
"""Text file converters (TXT, MD)."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
from typing import TYPE_CHECKING
|
|
7
|
+
|
|
8
|
+
from markitai.converter.base import (
|
|
9
|
+
BaseConverter,
|
|
10
|
+
ConvertResult,
|
|
11
|
+
FileFormat,
|
|
12
|
+
register_converter,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
if TYPE_CHECKING:
|
|
16
|
+
pass
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class TextConverter(BaseConverter):
|
|
20
|
+
"""Base converter for plain text files."""
|
|
21
|
+
|
|
22
|
+
def convert(
|
|
23
|
+
self, input_path: Path, output_dir: Path | None = None
|
|
24
|
+
) -> ConvertResult:
|
|
25
|
+
"""
|
|
26
|
+
Read text file content directly.
|
|
27
|
+
|
|
28
|
+
Args:
|
|
29
|
+
input_path: Path to the input file
|
|
30
|
+
output_dir: Unused for text files
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
ConvertResult containing the file content as markdown
|
|
34
|
+
"""
|
|
35
|
+
input_path = Path(input_path)
|
|
36
|
+
|
|
37
|
+
# Read file content
|
|
38
|
+
content = input_path.read_text(encoding="utf-8")
|
|
39
|
+
|
|
40
|
+
metadata = {
|
|
41
|
+
"source": str(input_path),
|
|
42
|
+
"format": input_path.suffix.lstrip(".").upper() or "TXT",
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
return ConvertResult(
|
|
46
|
+
markdown=content,
|
|
47
|
+
images=[],
|
|
48
|
+
metadata=metadata,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
@register_converter(FileFormat.TXT)
|
|
53
|
+
class TxtConverter(TextConverter):
|
|
54
|
+
"""Converter for plain text files."""
|
|
55
|
+
|
|
56
|
+
supported_formats = [FileFormat.TXT]
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
@register_converter(FileFormat.MD)
|
|
60
|
+
class MarkdownConverter(TextConverter):
|
|
61
|
+
"""Converter for Markdown files."""
|
|
62
|
+
|
|
63
|
+
supported_formats = [FileFormat.MD]
|