markitai 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- markitai-0.3.0/.gitignore +81 -0
- markitai-0.3.0/PKG-INFO +159 -0
- markitai-0.3.0/README.md +120 -0
- markitai-0.3.0/pyproject.toml +134 -0
- markitai-0.3.0/src/markitai/__init__.py +3 -0
- markitai-0.3.0/src/markitai/batch.py +1316 -0
- markitai-0.3.0/src/markitai/cli.py +3979 -0
- markitai-0.3.0/src/markitai/config.py +602 -0
- markitai-0.3.0/src/markitai/config.schema.json +748 -0
- markitai-0.3.0/src/markitai/constants.py +222 -0
- markitai-0.3.0/src/markitai/converter/__init__.py +49 -0
- markitai-0.3.0/src/markitai/converter/_patches.py +98 -0
- markitai-0.3.0/src/markitai/converter/base.py +164 -0
- markitai-0.3.0/src/markitai/converter/image.py +181 -0
- markitai-0.3.0/src/markitai/converter/legacy.py +606 -0
- markitai-0.3.0/src/markitai/converter/office.py +526 -0
- markitai-0.3.0/src/markitai/converter/pdf.py +679 -0
- markitai-0.3.0/src/markitai/converter/text.py +63 -0
- markitai-0.3.0/src/markitai/fetch.py +1725 -0
- markitai-0.3.0/src/markitai/image.py +1335 -0
- markitai-0.3.0/src/markitai/json_order.py +550 -0
- markitai-0.3.0/src/markitai/llm.py +4339 -0
- markitai-0.3.0/src/markitai/ocr.py +347 -0
- markitai-0.3.0/src/markitai/prompts/__init__.py +159 -0
- markitai-0.3.0/src/markitai/prompts/cleaner.md +93 -0
- markitai-0.3.0/src/markitai/prompts/document_enhance.md +77 -0
- markitai-0.3.0/src/markitai/prompts/document_enhance_complete.md +65 -0
- markitai-0.3.0/src/markitai/prompts/document_process.md +60 -0
- markitai-0.3.0/src/markitai/prompts/frontmatter.md +28 -0
- markitai-0.3.0/src/markitai/prompts/image_analysis.md +21 -0
- markitai-0.3.0/src/markitai/prompts/image_caption.md +8 -0
- markitai-0.3.0/src/markitai/prompts/image_description.md +13 -0
- markitai-0.3.0/src/markitai/prompts/page_content.md +17 -0
- markitai-0.3.0/src/markitai/prompts/url_enhance.md +78 -0
- markitai-0.3.0/src/markitai/security.py +286 -0
- markitai-0.3.0/src/markitai/types.py +30 -0
- markitai-0.3.0/src/markitai/urls.py +187 -0
- markitai-0.3.0/src/markitai/utils/__init__.py +33 -0
- markitai-0.3.0/src/markitai/utils/executor.py +69 -0
- markitai-0.3.0/src/markitai/utils/mime.py +85 -0
- markitai-0.3.0/src/markitai/utils/office.py +262 -0
- markitai-0.3.0/src/markitai/utils/output.py +53 -0
- markitai-0.3.0/src/markitai/utils/paths.py +81 -0
- markitai-0.3.0/src/markitai/utils/text.py +359 -0
- markitai-0.3.0/src/markitai/workflow/__init__.py +37 -0
- markitai-0.3.0/src/markitai/workflow/core.py +760 -0
- markitai-0.3.0/src/markitai/workflow/helpers.py +509 -0
- markitai-0.3.0/src/markitai/workflow/single.py +369 -0
- markitai-0.3.0/tests/SKILL.md +346 -0
- markitai-0.3.0/tests/__init__.py +1 -0
- markitai-0.3.0/tests/conftest.py +285 -0
- markitai-0.3.0/tests/fixtures/Free_Test_Data_500KB_PPTX.pptx +0 -0
- markitai-0.3.0/tests/fixtures/candy.JPG +0 -0
- markitai-0.3.0/tests/fixtures/file-example_PDF_500_kB.pdf +0 -0
- markitai-0.3.0/tests/fixtures/file_example_XLSX_100.xlsx +0 -0
- markitai-0.3.0/tests/fixtures/sub_dir/file-sample_100kB.doc +0 -0
- markitai-0.3.0/tests/fixtures/sub_dir/file_example_PPT_250kB.ppt +0 -0
- markitai-0.3.0/tests/fixtures/sub_dir/file_example_XLS_100.xls +0 -0
- markitai-0.3.0/tests/fixtures/test.urls +5 -0
- markitai-0.3.0/tests/integration/__init__.py +1 -0
- markitai-0.3.0/tests/integration/test_cache.py +1109 -0
- markitai-0.3.0/tests/integration/test_cli.py +544 -0
- markitai-0.3.0/tests/integration/test_output_format.py +458 -0
- markitai-0.3.0/tests/integration/test_url.py +342 -0
- markitai-0.3.0/tests/unit/__init__.py +1 -0
- markitai-0.3.0/tests/unit/test_atomic.py +205 -0
- markitai-0.3.0/tests/unit/test_batch.py +415 -0
- markitai-0.3.0/tests/unit/test_cli_helpers.py +662 -0
- markitai-0.3.0/tests/unit/test_config.py +265 -0
- markitai-0.3.0/tests/unit/test_converter.py +160 -0
- markitai-0.3.0/tests/unit/test_executor.py +254 -0
- markitai-0.3.0/tests/unit/test_fetch.py +360 -0
- markitai-0.3.0/tests/unit/test_image.py +568 -0
- markitai-0.3.0/tests/unit/test_image_converter.py +185 -0
- markitai-0.3.0/tests/unit/test_json_order.py +395 -0
- markitai-0.3.0/tests/unit/test_llm.py +1029 -0
- markitai-0.3.0/tests/unit/test_llm_runtime.py +175 -0
- markitai-0.3.0/tests/unit/test_ocr.py +209 -0
- markitai-0.3.0/tests/unit/test_prompts.py +161 -0
- markitai-0.3.0/tests/unit/test_schema_sync.py +235 -0
- markitai-0.3.0/tests/unit/test_security.py +324 -0
- markitai-0.3.0/tests/unit/test_workflow_core.py +781 -0
- markitai-0.3.0/tests/unit/test_workflow_helpers.py +434 -0
- markitai-0.3.0/tests/unit/test_workflow_single.py +353 -0
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# Archive
|
|
2
|
+
.archive
|
|
3
|
+
|
|
4
|
+
# Python
|
|
5
|
+
__pycache__/
|
|
6
|
+
*.py[cod]
|
|
7
|
+
*$py.class
|
|
8
|
+
*.so
|
|
9
|
+
.Python
|
|
10
|
+
build/
|
|
11
|
+
develop-eggs/
|
|
12
|
+
dist/
|
|
13
|
+
downloads/
|
|
14
|
+
eggs/
|
|
15
|
+
.eggs/
|
|
16
|
+
lib/
|
|
17
|
+
lib64/
|
|
18
|
+
parts/
|
|
19
|
+
sdist/
|
|
20
|
+
var/
|
|
21
|
+
wheels/
|
|
22
|
+
*.egg-info/
|
|
23
|
+
.installed.cfg
|
|
24
|
+
*.egg
|
|
25
|
+
|
|
26
|
+
# Virtual environments
|
|
27
|
+
.venv/
|
|
28
|
+
venv/
|
|
29
|
+
ENV/
|
|
30
|
+
|
|
31
|
+
# IDE
|
|
32
|
+
.idea/
|
|
33
|
+
# .vscode/
|
|
34
|
+
*.swp
|
|
35
|
+
*.swo
|
|
36
|
+
*~
|
|
37
|
+
|
|
38
|
+
# Testing
|
|
39
|
+
.pytest_cache/
|
|
40
|
+
.coverage
|
|
41
|
+
htmlcov/
|
|
42
|
+
.tox/
|
|
43
|
+
.nox/
|
|
44
|
+
|
|
45
|
+
# Type checking
|
|
46
|
+
.mypy_cache/
|
|
47
|
+
.pytype/
|
|
48
|
+
|
|
49
|
+
# Markitai output
|
|
50
|
+
output/
|
|
51
|
+
output-*/
|
|
52
|
+
|
|
53
|
+
# Markitai config (user-specific)
|
|
54
|
+
markitai.json
|
|
55
|
+
|
|
56
|
+
# Logs
|
|
57
|
+
logs/
|
|
58
|
+
*.log
|
|
59
|
+
|
|
60
|
+
# Environment variables (API keys)
|
|
61
|
+
.env
|
|
62
|
+
.env.*
|
|
63
|
+
!.env.example
|
|
64
|
+
|
|
65
|
+
# OS
|
|
66
|
+
.DS_Store
|
|
67
|
+
Thumbs.db
|
|
68
|
+
|
|
69
|
+
# SQLite cache (including WAL mode files)
|
|
70
|
+
cache.db
|
|
71
|
+
cache.db-wal
|
|
72
|
+
cache.db-shm
|
|
73
|
+
*.db-wal
|
|
74
|
+
*.db-shm
|
|
75
|
+
fetch_cache.db
|
|
76
|
+
|
|
77
|
+
# VitePress (website)
|
|
78
|
+
website/node_modules/
|
|
79
|
+
website/.vitepress/cache/
|
|
80
|
+
website/.vitepress/dist/
|
|
81
|
+
website/changelog.md
|
markitai-0.3.0/PKG-INFO
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: markitai
|
|
3
|
+
Version: 0.3.0
|
|
4
|
+
Summary: Document to Markdown converter with LLM enhancement
|
|
5
|
+
Project-URL: Homepage, https://markitai.ynewtime.com
|
|
6
|
+
Project-URL: Documentation, https://markitai.ynewtime.com/guide/getting-started
|
|
7
|
+
Project-URL: Repository, https://github.com/Ynewtime/markitai
|
|
8
|
+
Project-URL: Changelog, https://github.com/Ynewtime/markitai/blob/main/CHANGELOG.md
|
|
9
|
+
Author-email: Ynewtime <longqiliuye@gmail.com>
|
|
10
|
+
License-Expression: MIT
|
|
11
|
+
Keywords: converter,docx,llm,markdown,ocr,pdf
|
|
12
|
+
Classifier: Development Status :: 4 - Beta
|
|
13
|
+
Classifier: Environment :: Console
|
|
14
|
+
Classifier: Intended Audience :: Developers
|
|
15
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
16
|
+
Classifier: Operating System :: OS Independent
|
|
17
|
+
Classifier: Programming Language :: Python :: 3
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
21
|
+
Classifier: Topic :: Text Processing :: Markup :: Markdown
|
|
22
|
+
Classifier: Topic :: Utilities
|
|
23
|
+
Requires-Python: >=3.11
|
|
24
|
+
Requires-Dist: aiofiles>=25.1.0
|
|
25
|
+
Requires-Dist: click>=8.1.0
|
|
26
|
+
Requires-Dist: instructor>=1.14.0
|
|
27
|
+
Requires-Dist: litellm>=1.80.16
|
|
28
|
+
Requires-Dist: loguru>=0.7.3
|
|
29
|
+
Requires-Dist: markitdown[all]>=0.1.4
|
|
30
|
+
Requires-Dist: pillow>=12.1.0
|
|
31
|
+
Requires-Dist: pydantic>=2.10.0
|
|
32
|
+
Requires-Dist: pymupdf4llm>=0.2.9
|
|
33
|
+
Requires-Dist: python-dotenv>=1.2.1
|
|
34
|
+
Requires-Dist: pywin32>=310; sys_platform == 'win32'
|
|
35
|
+
Requires-Dist: rapidocr>=3.5.0
|
|
36
|
+
Requires-Dist: rich>=14.2.0
|
|
37
|
+
Provides-Extra: all
|
|
38
|
+
Description-Content-Type: text/markdown
|
|
39
|
+
|
|
40
|
+
# Markitai
|
|
41
|
+
|
|
42
|
+
开箱即用的 Markdown 转换器,原生支持 LLM 增强。
|
|
43
|
+
|
|
44
|
+
## 特性
|
|
45
|
+
|
|
46
|
+
- **多格式支持** - DOCX/DOC, PPTX/PPT, XLSX/XLS, PDF, TXT, MD, JPG/PNG/WebP, URLs
|
|
47
|
+
- **LLM 增强** - 格式清洗、元数据生成、图片分析
|
|
48
|
+
- **批量处理** - 并发转换、断点恢复、进度显示
|
|
49
|
+
- **OCR 识别** - 扫描版 PDF 和图片文字提取
|
|
50
|
+
- **URL 转换** - 直接转换网页,支持 SPA 浏览器渲染
|
|
51
|
+
|
|
52
|
+
## 安装
|
|
53
|
+
|
|
54
|
+
### 一键安装(推荐)
|
|
55
|
+
|
|
56
|
+
```bash
|
|
57
|
+
# Linux/macOS
|
|
58
|
+
curl -fsSL https://raw.githubusercontent.com/Ynewtime/markitai/main/scripts/setup.sh | sh
|
|
59
|
+
|
|
60
|
+
# Windows (PowerShell)
|
|
61
|
+
irm https://raw.githubusercontent.com/Ynewtime/markitai/main/scripts/setup.ps1 | iex
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
### 手动安装
|
|
65
|
+
|
|
66
|
+
```bash
|
|
67
|
+
# 需要 Python 3.11+
|
|
68
|
+
uv tool install markitai
|
|
69
|
+
|
|
70
|
+
# 或使用 pip
|
|
71
|
+
pip install --user markitai
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
## 快速开始
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
# 基础转换
|
|
78
|
+
markitai document.docx
|
|
79
|
+
|
|
80
|
+
# URL 转换
|
|
81
|
+
markitai https://example.com/article
|
|
82
|
+
|
|
83
|
+
# LLM 增强
|
|
84
|
+
markitai document.docx --llm
|
|
85
|
+
|
|
86
|
+
# 使用预设
|
|
87
|
+
markitai document.pdf --preset rich # LLM + alt + desc + screenshot
|
|
88
|
+
markitai document.pdf --preset standard # LLM + alt + desc
|
|
89
|
+
markitai document.pdf --preset minimal # 仅基础转换
|
|
90
|
+
|
|
91
|
+
# 批量处理
|
|
92
|
+
markitai ./docs -o ./output
|
|
93
|
+
|
|
94
|
+
# 断点恢复
|
|
95
|
+
markitai ./docs -o ./output --resume
|
|
96
|
+
|
|
97
|
+
# URL 批量处理(自动识别 .urls 文件)
|
|
98
|
+
markitai urls.urls -o ./output
|
|
99
|
+
```
|
|
100
|
+
|
|
101
|
+
## 输出结构
|
|
102
|
+
|
|
103
|
+
```
|
|
104
|
+
output/
|
|
105
|
+
├── document.docx.md # 基础 Markdown
|
|
106
|
+
├── document.docx.llm.md # LLM 优化版
|
|
107
|
+
├── assets/
|
|
108
|
+
│ ├── document.docx.0001.jpg
|
|
109
|
+
│ └── images.json # 图片描述
|
|
110
|
+
├── screenshots/ # 页面截图(--screenshot 时)
|
|
111
|
+
│ └── example_com.full.jpg
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
## 配置
|
|
115
|
+
|
|
116
|
+
优先级:命令行 > 环境变量 > 配置文件 > 默认值
|
|
117
|
+
|
|
118
|
+
```bash
|
|
119
|
+
# 查看配置
|
|
120
|
+
markitai config list
|
|
121
|
+
|
|
122
|
+
# 初始化配置文件
|
|
123
|
+
markitai config init -o .
|
|
124
|
+
|
|
125
|
+
# 查看缓存状态
|
|
126
|
+
markitai cache stats
|
|
127
|
+
|
|
128
|
+
# 清理缓存
|
|
129
|
+
markitai cache clear
|
|
130
|
+
```
|
|
131
|
+
|
|
132
|
+
配置文件路径:`./markitai.json` 或 `~/.markitai/config.json`
|
|
133
|
+
|
|
134
|
+
## 环境变量
|
|
135
|
+
|
|
136
|
+
| 变量 | 说明 |
|
|
137
|
+
|------|------|
|
|
138
|
+
| `OPENAI_API_KEY` | OpenAI API Key |
|
|
139
|
+
| `GEMINI_API_KEY` | Google Gemini API Key |
|
|
140
|
+
| `DEEPSEEK_API_KEY` | DeepSeek API Key |
|
|
141
|
+
| `ANTHROPIC_API_KEY` | Anthropic API Key |
|
|
142
|
+
| `JINA_API_KEY` | Jina Reader API Key(URL 转换) |
|
|
143
|
+
|
|
144
|
+
## 依赖
|
|
145
|
+
|
|
146
|
+
- [pymupdf4llm](https://github.com/pymupdf/RAG) - PDF 转换
|
|
147
|
+
- [markitdown](https://github.com/microsoft/markitdown) - Office 文档和 URL 转换
|
|
148
|
+
- [LiteLLM](https://github.com/BerriAI/litellm) - LLM 网关
|
|
149
|
+
- [RapidOCR](https://github.com/RapidAI/RapidOCR) - OCR 识别
|
|
150
|
+
|
|
151
|
+
## 文档
|
|
152
|
+
|
|
153
|
+
- [快速开始](https://ynewtime.github.io/markitai/guide/getting-started)
|
|
154
|
+
- [配置说明](https://ynewtime.github.io/markitai/guide/configuration)
|
|
155
|
+
- [CLI 命令参考](https://ynewtime.github.io/markitai/guide/cli)
|
|
156
|
+
|
|
157
|
+
## License
|
|
158
|
+
|
|
159
|
+
MIT
|
markitai-0.3.0/README.md
ADDED
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
# Markitai
|
|
2
|
+
|
|
3
|
+
开箱即用的 Markdown 转换器,原生支持 LLM 增强。
|
|
4
|
+
|
|
5
|
+
## 特性
|
|
6
|
+
|
|
7
|
+
- **多格式支持** - DOCX/DOC, PPTX/PPT, XLSX/XLS, PDF, TXT, MD, JPG/PNG/WebP, URLs
|
|
8
|
+
- **LLM 增强** - 格式清洗、元数据生成、图片分析
|
|
9
|
+
- **批量处理** - 并发转换、断点恢复、进度显示
|
|
10
|
+
- **OCR 识别** - 扫描版 PDF 和图片文字提取
|
|
11
|
+
- **URL 转换** - 直接转换网页,支持 SPA 浏览器渲染
|
|
12
|
+
|
|
13
|
+
## 安装
|
|
14
|
+
|
|
15
|
+
### 一键安装(推荐)
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
# Linux/macOS
|
|
19
|
+
curl -fsSL https://raw.githubusercontent.com/Ynewtime/markitai/main/scripts/setup.sh | sh
|
|
20
|
+
|
|
21
|
+
# Windows (PowerShell)
|
|
22
|
+
irm https://raw.githubusercontent.com/Ynewtime/markitai/main/scripts/setup.ps1 | iex
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
### 手动安装
|
|
26
|
+
|
|
27
|
+
```bash
|
|
28
|
+
# 需要 Python 3.11+
|
|
29
|
+
uv tool install markitai
|
|
30
|
+
|
|
31
|
+
# 或使用 pip
|
|
32
|
+
pip install --user markitai
|
|
33
|
+
```
|
|
34
|
+
|
|
35
|
+
## 快速开始
|
|
36
|
+
|
|
37
|
+
```bash
|
|
38
|
+
# 基础转换
|
|
39
|
+
markitai document.docx
|
|
40
|
+
|
|
41
|
+
# URL 转换
|
|
42
|
+
markitai https://example.com/article
|
|
43
|
+
|
|
44
|
+
# LLM 增强
|
|
45
|
+
markitai document.docx --llm
|
|
46
|
+
|
|
47
|
+
# 使用预设
|
|
48
|
+
markitai document.pdf --preset rich # LLM + alt + desc + screenshot
|
|
49
|
+
markitai document.pdf --preset standard # LLM + alt + desc
|
|
50
|
+
markitai document.pdf --preset minimal # 仅基础转换
|
|
51
|
+
|
|
52
|
+
# 批量处理
|
|
53
|
+
markitai ./docs -o ./output
|
|
54
|
+
|
|
55
|
+
# 断点恢复
|
|
56
|
+
markitai ./docs -o ./output --resume
|
|
57
|
+
|
|
58
|
+
# URL 批量处理(自动识别 .urls 文件)
|
|
59
|
+
markitai urls.urls -o ./output
|
|
60
|
+
```
|
|
61
|
+
|
|
62
|
+
## 输出结构
|
|
63
|
+
|
|
64
|
+
```
|
|
65
|
+
output/
|
|
66
|
+
├── document.docx.md # 基础 Markdown
|
|
67
|
+
├── document.docx.llm.md # LLM 优化版
|
|
68
|
+
├── assets/
|
|
69
|
+
│ ├── document.docx.0001.jpg
|
|
70
|
+
│ └── images.json # 图片描述
|
|
71
|
+
├── screenshots/ # 页面截图(--screenshot 时)
|
|
72
|
+
│ └── example_com.full.jpg
|
|
73
|
+
```
|
|
74
|
+
|
|
75
|
+
## 配置
|
|
76
|
+
|
|
77
|
+
优先级:命令行 > 环境变量 > 配置文件 > 默认值
|
|
78
|
+
|
|
79
|
+
```bash
|
|
80
|
+
# 查看配置
|
|
81
|
+
markitai config list
|
|
82
|
+
|
|
83
|
+
# 初始化配置文件
|
|
84
|
+
markitai config init -o .
|
|
85
|
+
|
|
86
|
+
# 查看缓存状态
|
|
87
|
+
markitai cache stats
|
|
88
|
+
|
|
89
|
+
# 清理缓存
|
|
90
|
+
markitai cache clear
|
|
91
|
+
```
|
|
92
|
+
|
|
93
|
+
配置文件路径:`./markitai.json` 或 `~/.markitai/config.json`
|
|
94
|
+
|
|
95
|
+
## 环境变量
|
|
96
|
+
|
|
97
|
+
| 变量 | 说明 |
|
|
98
|
+
|------|------|
|
|
99
|
+
| `OPENAI_API_KEY` | OpenAI API Key |
|
|
100
|
+
| `GEMINI_API_KEY` | Google Gemini API Key |
|
|
101
|
+
| `DEEPSEEK_API_KEY` | DeepSeek API Key |
|
|
102
|
+
| `ANTHROPIC_API_KEY` | Anthropic API Key |
|
|
103
|
+
| `JINA_API_KEY` | Jina Reader API Key(URL 转换) |
|
|
104
|
+
|
|
105
|
+
## 依赖
|
|
106
|
+
|
|
107
|
+
- [pymupdf4llm](https://github.com/pymupdf/RAG) - PDF 转换
|
|
108
|
+
- [markitdown](https://github.com/microsoft/markitdown) - Office 文档和 URL 转换
|
|
109
|
+
- [LiteLLM](https://github.com/BerriAI/litellm) - LLM 网关
|
|
110
|
+
- [RapidOCR](https://github.com/RapidAI/RapidOCR) - OCR 识别
|
|
111
|
+
|
|
112
|
+
## 文档
|
|
113
|
+
|
|
114
|
+
- [快速开始](https://ynewtime.github.io/markitai/guide/getting-started)
|
|
115
|
+
- [配置说明](https://ynewtime.github.io/markitai/guide/configuration)
|
|
116
|
+
- [CLI 命令参考](https://ynewtime.github.io/markitai/guide/cli)
|
|
117
|
+
|
|
118
|
+
## License
|
|
119
|
+
|
|
120
|
+
MIT
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
[project]
|
|
2
|
+
name = "markitai"
|
|
3
|
+
version = "0.3.0"
|
|
4
|
+
description = "Document to Markdown converter with LLM enhancement"
|
|
5
|
+
license = "MIT"
|
|
6
|
+
readme = "README.md"
|
|
7
|
+
requires-python = ">=3.11"
|
|
8
|
+
authors = [
|
|
9
|
+
{ name = "Ynewtime", email = "longqiliuye@gmail.com" }
|
|
10
|
+
]
|
|
11
|
+
keywords = ["markdown", "converter", "llm", "pdf", "docx", "ocr"]
|
|
12
|
+
classifiers = [
|
|
13
|
+
"Development Status :: 4 - Beta",
|
|
14
|
+
"Environment :: Console",
|
|
15
|
+
"Intended Audience :: Developers",
|
|
16
|
+
"License :: OSI Approved :: MIT License",
|
|
17
|
+
"Operating System :: OS Independent",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Programming Language :: Python :: 3.11",
|
|
20
|
+
"Programming Language :: Python :: 3.12",
|
|
21
|
+
"Programming Language :: Python :: 3.13",
|
|
22
|
+
"Topic :: Text Processing :: Markup :: Markdown",
|
|
23
|
+
"Topic :: Utilities",
|
|
24
|
+
]
|
|
25
|
+
dependencies = [
|
|
26
|
+
"pymupdf4llm>=0.2.9",
|
|
27
|
+
"markitdown[all]>=0.1.4",
|
|
28
|
+
"litellm>=1.80.16",
|
|
29
|
+
"instructor>=1.14.0",
|
|
30
|
+
"rapidocr>=3.5.0",
|
|
31
|
+
"click>=8.1.0",
|
|
32
|
+
"loguru>=0.7.3",
|
|
33
|
+
"rich>=14.2.0",
|
|
34
|
+
"Pillow>=12.1.0",
|
|
35
|
+
"aiofiles>=25.1.0",
|
|
36
|
+
"pydantic>=2.10.0",
|
|
37
|
+
"python-dotenv>=1.2.1",
|
|
38
|
+
"pywin32>=310; sys_platform == 'win32'",
|
|
39
|
+
]
|
|
40
|
+
|
|
41
|
+
[project.urls]
|
|
42
|
+
Homepage = "https://markitai.ynewtime.com"
|
|
43
|
+
Documentation = "https://markitai.ynewtime.com/guide/getting-started"
|
|
44
|
+
Repository = "https://github.com/Ynewtime/markitai"
|
|
45
|
+
Changelog = "https://github.com/Ynewtime/markitai/blob/main/CHANGELOG.md"
|
|
46
|
+
|
|
47
|
+
[project.scripts]
|
|
48
|
+
markitai = "markitai.cli:app"
|
|
49
|
+
|
|
50
|
+
[project.optional-dependencies]
|
|
51
|
+
all = []
|
|
52
|
+
|
|
53
|
+
[dependency-groups]
|
|
54
|
+
dev = [
|
|
55
|
+
"pytest>=8.3.0",
|
|
56
|
+
"pytest-xdist>=3.5.0",
|
|
57
|
+
"pytest-asyncio>=0.25.0",
|
|
58
|
+
"ruff>=0.9.0",
|
|
59
|
+
"pyright>=1.1.400",
|
|
60
|
+
"lxml-stubs>=0.5.1",
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
[build-system]
|
|
64
|
+
requires = ["hatchling"]
|
|
65
|
+
build-backend = "hatchling.build"
|
|
66
|
+
|
|
67
|
+
[tool.hatch.build.targets.wheel]
|
|
68
|
+
packages = ["src/markitai"]
|
|
69
|
+
|
|
70
|
+
[tool.pytest.ini_options]
|
|
71
|
+
testpaths = ["tests"]
|
|
72
|
+
asyncio_mode = "auto"
|
|
73
|
+
asyncio_default_fixture_loop_scope = "function"
|
|
74
|
+
|
|
75
|
+
[tool.ruff]
|
|
76
|
+
target-version = "py311"
|
|
77
|
+
line-length = 88
|
|
78
|
+
src = ["src", "tests"]
|
|
79
|
+
|
|
80
|
+
[tool.ruff.lint]
|
|
81
|
+
select = [
|
|
82
|
+
"E", # pycodestyle errors
|
|
83
|
+
"W", # pycodestyle warnings
|
|
84
|
+
"F", # Pyflakes
|
|
85
|
+
"I", # isort
|
|
86
|
+
"B", # flake8-bugbear
|
|
87
|
+
"C4", # flake8-comprehensions
|
|
88
|
+
"UP", # pyupgrade
|
|
89
|
+
"ARG", # flake8-unused-arguments
|
|
90
|
+
"SIM", # flake8-simplify
|
|
91
|
+
]
|
|
92
|
+
ignore = [
|
|
93
|
+
"E501", # line too long (handled by formatter)
|
|
94
|
+
"E402", # module level import not at top (needed for env setup before imports)
|
|
95
|
+
"B008", # do not perform function calls in argument defaults
|
|
96
|
+
"B904", # raise from in except (too verbose for simple re-raises)
|
|
97
|
+
"B905", # zip without explicit strict
|
|
98
|
+
"ARG001", # unused function argument (click callback signatures)
|
|
99
|
+
"ARG002", # unused method argument (callback signatures)
|
|
100
|
+
"SIM102", # nested if (explicit nesting is clearer for complex conditions)
|
|
101
|
+
"SIM105", # contextlib.suppress (explicit try-except is clearer)
|
|
102
|
+
"SIM108", # use ternary operator instead of if-else
|
|
103
|
+
"SIM103", # return condition directly (explicit return is clearer)
|
|
104
|
+
"UP047", # use type parameters (PEP 695) - TypeVar is clearer for compatibility
|
|
105
|
+
]
|
|
106
|
+
|
|
107
|
+
[tool.ruff.lint.isort]
|
|
108
|
+
known-first-party = ["markitai"]
|
|
109
|
+
|
|
110
|
+
[tool.ruff.format]
|
|
111
|
+
quote-style = "double"
|
|
112
|
+
indent-style = "space"
|
|
113
|
+
skip-magic-trailing-comma = false
|
|
114
|
+
line-ending = "auto"
|
|
115
|
+
|
|
116
|
+
[tool.pyright]
|
|
117
|
+
pythonVersion = "3.11"
|
|
118
|
+
typeCheckingMode = "basic"
|
|
119
|
+
include = ["src"]
|
|
120
|
+
exclude = ["tests", "**/__pycache__"]
|
|
121
|
+
venvPath = "../.."
|
|
122
|
+
venv = ".venv"
|
|
123
|
+
reportMissingImports = true
|
|
124
|
+
reportMissingTypeStubs = false
|
|
125
|
+
reportUnusedImport = true
|
|
126
|
+
reportUnusedVariable = "warning"
|
|
127
|
+
# Third-party libraries with incomplete type stubs
|
|
128
|
+
reportPrivateImportUsage = "warning"
|
|
129
|
+
reportAttributeAccessIssue = "warning"
|
|
130
|
+
reportArgumentType = "warning"
|
|
131
|
+
reportCallIssue = "warning"
|
|
132
|
+
reportReturnType = "warning"
|
|
133
|
+
# Click decorator patterns (e.g., @app.group())
|
|
134
|
+
reportFunctionMemberAccess = "warning"
|