markitai 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. markitai/__init__.py +3 -0
  2. markitai/batch.py +1316 -0
  3. markitai/cli.py +3979 -0
  4. markitai/config.py +602 -0
  5. markitai/config.schema.json +748 -0
  6. markitai/constants.py +222 -0
  7. markitai/converter/__init__.py +49 -0
  8. markitai/converter/_patches.py +98 -0
  9. markitai/converter/base.py +164 -0
  10. markitai/converter/image.py +181 -0
  11. markitai/converter/legacy.py +606 -0
  12. markitai/converter/office.py +526 -0
  13. markitai/converter/pdf.py +679 -0
  14. markitai/converter/text.py +63 -0
  15. markitai/fetch.py +1725 -0
  16. markitai/image.py +1335 -0
  17. markitai/json_order.py +550 -0
  18. markitai/llm.py +4339 -0
  19. markitai/ocr.py +347 -0
  20. markitai/prompts/__init__.py +159 -0
  21. markitai/prompts/cleaner.md +93 -0
  22. markitai/prompts/document_enhance.md +77 -0
  23. markitai/prompts/document_enhance_complete.md +65 -0
  24. markitai/prompts/document_process.md +60 -0
  25. markitai/prompts/frontmatter.md +28 -0
  26. markitai/prompts/image_analysis.md +21 -0
  27. markitai/prompts/image_caption.md +8 -0
  28. markitai/prompts/image_description.md +13 -0
  29. markitai/prompts/page_content.md +17 -0
  30. markitai/prompts/url_enhance.md +78 -0
  31. markitai/security.py +286 -0
  32. markitai/types.py +30 -0
  33. markitai/urls.py +187 -0
  34. markitai/utils/__init__.py +33 -0
  35. markitai/utils/executor.py +69 -0
  36. markitai/utils/mime.py +85 -0
  37. markitai/utils/office.py +262 -0
  38. markitai/utils/output.py +53 -0
  39. markitai/utils/paths.py +81 -0
  40. markitai/utils/text.py +359 -0
  41. markitai/workflow/__init__.py +37 -0
  42. markitai/workflow/core.py +760 -0
  43. markitai/workflow/helpers.py +509 -0
  44. markitai/workflow/single.py +369 -0
  45. markitai-0.3.0.dist-info/METADATA +159 -0
  46. markitai-0.3.0.dist-info/RECORD +48 -0
  47. markitai-0.3.0.dist-info/WHEEL +4 -0
  48. markitai-0.3.0.dist-info/entry_points.txt +2 -0
@@ -0,0 +1,63 @@
1
+ """Text file converters (TXT, MD)."""
2
+
3
+ from __future__ import annotations
4
+
5
+ from pathlib import Path
6
+ from typing import TYPE_CHECKING
7
+
8
+ from markitai.converter.base import (
9
+ BaseConverter,
10
+ ConvertResult,
11
+ FileFormat,
12
+ register_converter,
13
+ )
14
+
15
+ if TYPE_CHECKING:
16
+ pass
17
+
18
+
19
+ class TextConverter(BaseConverter):
20
+ """Base converter for plain text files."""
21
+
22
+ def convert(
23
+ self, input_path: Path, output_dir: Path | None = None
24
+ ) -> ConvertResult:
25
+ """
26
+ Read text file content directly.
27
+
28
+ Args:
29
+ input_path: Path to the input file
30
+ output_dir: Unused for text files
31
+
32
+ Returns:
33
+ ConvertResult containing the file content as markdown
34
+ """
35
+ input_path = Path(input_path)
36
+
37
+ # Read file content
38
+ content = input_path.read_text(encoding="utf-8")
39
+
40
+ metadata = {
41
+ "source": str(input_path),
42
+ "format": input_path.suffix.lstrip(".").upper() or "TXT",
43
+ }
44
+
45
+ return ConvertResult(
46
+ markdown=content,
47
+ images=[],
48
+ metadata=metadata,
49
+ )
50
+
51
+
52
+ @register_converter(FileFormat.TXT)
53
+ class TxtConverter(TextConverter):
54
+ """Converter for plain text files."""
55
+
56
+ supported_formats = [FileFormat.TXT]
57
+
58
+
59
+ @register_converter(FileFormat.MD)
60
+ class MarkdownConverter(TextConverter):
61
+ """Converter for Markdown files."""
62
+
63
+ supported_formats = [FileFormat.MD]