markitai 0.3.1__tar.gz → 0.4.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {markitai-0.3.1 → markitai-0.4.1}/.gitignore +7 -7
- {markitai-0.3.1 → markitai-0.4.1}/PKG-INFO +41 -6
- {markitai-0.3.1 → markitai-0.4.1}/README.md +29 -3
- {markitai-0.3.1 → markitai-0.4.1}/pyproject.toml +16 -7
- markitai-0.4.1/src/markitai/__init__.py +3 -0
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/batch.py +41 -17
- markitai-0.4.1/src/markitai/cli/__init__.py +52 -0
- markitai-0.4.1/src/markitai/cli/commands/__init__.py +18 -0
- markitai-0.4.1/src/markitai/cli/commands/cache.py +292 -0
- markitai-0.4.1/src/markitai/cli/commands/config.py +240 -0
- markitai-0.4.1/src/markitai/cli/commands/doctor.py +561 -0
- markitai-0.4.1/src/markitai/cli/console.py +50 -0
- markitai-0.4.1/src/markitai/cli/framework.py +130 -0
- markitai-0.4.1/src/markitai/cli/logging_config.py +377 -0
- markitai-0.4.1/src/markitai/cli/main.py +1036 -0
- markitai-0.4.1/src/markitai/cli/processors/__init__.py +47 -0
- markitai-0.4.1/src/markitai/cli/processors/batch.py +877 -0
- markitai-0.4.1/src/markitai/cli/processors/file.py +226 -0
- markitai-0.4.1/src/markitai/cli/processors/llm.py +383 -0
- markitai-0.4.1/src/markitai/cli/processors/url.py +1050 -0
- markitai-0.4.1/src/markitai/cli/processors/validators.py +265 -0
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/config.py +85 -27
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/config.schema.json +23 -35
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/constants.py +51 -9
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/converter/pdf.py +2 -2
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/fetch.py +606 -887
- markitai-0.4.1/src/markitai/fetch_playwright.py +482 -0
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/image.py +45 -5
- markitai-0.4.1/src/markitai/llm/__init__.py +100 -0
- markitai-0.4.1/src/markitai/llm/cache.py +521 -0
- markitai-0.4.1/src/markitai/llm/content.py +632 -0
- markitai-0.4.1/src/markitai/llm/document.py +1525 -0
- markitai-0.4.1/src/markitai/llm/models.py +205 -0
- markitai-0.4.1/src/markitai/llm/processor.py +2361 -0
- markitai-0.4.1/src/markitai/llm/types.py +201 -0
- markitai-0.4.1/src/markitai/llm/vision.py +866 -0
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/ocr.py +2 -3
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/__init__.py +6 -3
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/cleaner_system.md +10 -0
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/document_enhance_complete_system.md +40 -6
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/document_enhance_system.md +32 -1
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/document_process_system.md +19 -2
- markitai-0.4.1/src/markitai/prompts/document_vision_system.md +117 -0
- markitai-0.4.1/src/markitai/prompts/document_vision_user.md +5 -0
- markitai-0.4.1/src/markitai/prompts/screenshot_extract_system.md +76 -0
- markitai-0.4.1/src/markitai/prompts/screenshot_extract_user.md +1 -0
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/url_enhance_system.md +30 -8
- markitai-0.4.1/src/markitai/providers/__init__.py +695 -0
- markitai-0.4.1/src/markitai/providers/auth.py +351 -0
- markitai-0.4.1/src/markitai/providers/claude_agent.py +649 -0
- markitai-0.4.1/src/markitai/providers/copilot.py +844 -0
- markitai-0.4.1/src/markitai/providers/errors.py +225 -0
- markitai-0.4.1/src/markitai/providers/json_mode.py +217 -0
- markitai-0.4.1/src/markitai/providers/timeout.py +169 -0
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/security.py +88 -6
- markitai-0.4.1/src/markitai/utils/__init__.py +69 -0
- markitai-0.4.1/src/markitai/utils/cli_helpers.py +171 -0
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/utils/executor.py +13 -0
- markitai-0.4.1/src/markitai/utils/frontmatter.py +315 -0
- markitai-0.4.1/src/markitai/utils/progress.py +92 -0
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/utils/text.py +110 -0
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/workflow/core.py +68 -36
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/workflow/helpers.py +46 -16
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/workflow/single.py +91 -43
- {markitai-0.3.1 → markitai-0.4.1}/tests/integration/test_cache.py +29 -232
- {markitai-0.3.1 → markitai-0.4.1}/tests/integration/test_cli.py +1 -1
- markitai-0.4.1/tests/integration/test_cli_full.py +914 -0
- markitai-0.4.1/tests/integration/test_local_providers.py +855 -0
- {markitai-0.3.1 → markitai-0.4.1}/tests/integration/test_output_format.py +5 -4
- markitai-0.4.1/tests/integration/test_real_scenarios.py +379 -0
- markitai-0.4.1/tests/unit/test_batch_processor.py +1368 -0
- markitai-0.4.1/tests/unit/test_cache_cli.py +491 -0
- {markitai-0.3.1 → markitai-0.4.1}/tests/unit/test_cli_helpers.py +13 -5
- markitai-0.4.1/tests/unit/test_cli_main.py +867 -0
- markitai-0.4.1/tests/unit/test_config_cli.py +282 -0
- markitai-0.4.1/tests/unit/test_converter_pdf.py +889 -0
- markitai-0.4.1/tests/unit/test_deps_cli.py +742 -0
- markitai-0.4.1/tests/unit/test_doctor_cli.py +366 -0
- markitai-0.4.1/tests/unit/test_document_utils.py +1731 -0
- {markitai-0.3.1 → markitai-0.4.1}/tests/unit/test_executor.py +2 -2
- markitai-0.4.1/tests/unit/test_fetch.py +3230 -0
- markitai-0.4.1/tests/unit/test_fetch_playwright.py +1145 -0
- markitai-0.4.1/tests/unit/test_frontmatter.py +422 -0
- markitai-0.4.1/tests/unit/test_image.py +2453 -0
- {markitai-0.3.1 → markitai-0.4.1}/tests/unit/test_llm.py +63 -79
- markitai-0.4.1/tests/unit/test_llm_content.py +266 -0
- markitai-0.4.1/tests/unit/test_llm_models.py +545 -0
- markitai-0.4.1/tests/unit/test_llm_processor.py +1337 -0
- markitai-0.4.1/tests/unit/test_llm_processor_cli.py +1172 -0
- {markitai-0.3.1 → markitai-0.4.1}/tests/unit/test_prompts.py +1 -30
- markitai-0.4.1/tests/unit/test_provider_auth.py +512 -0
- markitai-0.4.1/tests/unit/test_provider_errors.py +307 -0
- markitai-0.4.1/tests/unit/test_provider_json_mode.py +236 -0
- markitai-0.4.1/tests/unit/test_provider_timeout.py +302 -0
- markitai-0.4.1/tests/unit/test_providers.py +1274 -0
- markitai-0.4.1/tests/unit/test_security.py +873 -0
- markitai-0.4.1/tests/unit/test_url_processor.py +878 -0
- markitai-0.4.1/tests/unit/test_utils_text.py +248 -0
- markitai-0.4.1/tests/unit/test_vision_mixin.py +1493 -0
- markitai-0.4.1/tests/unit/test_vision_utils.py +54 -0
- {markitai-0.3.1 → markitai-0.4.1}/tests/unit/test_workflow_core.py +757 -0
- {markitai-0.3.1 → markitai-0.4.1}/tests/unit/test_workflow_helpers.py +278 -0
- markitai-0.4.1/tests/unit/test_workflow_single.py +711 -0
- markitai-0.3.1/src/markitai/__init__.py +0 -3
- markitai-0.3.1/src/markitai/cli.py +0 -4081
- markitai-0.3.1/src/markitai/llm.py +0 -4474
- markitai-0.3.1/src/markitai/prompts/frontmatter_system.md +0 -24
- markitai-0.3.1/src/markitai/prompts/frontmatter_user.md +0 -5
- markitai-0.3.1/src/markitai/utils/__init__.py +0 -33
- markitai-0.3.1/tests/unit/test_fetch.py +0 -789
- markitai-0.3.1/tests/unit/test_image.py +0 -781
- markitai-0.3.1/tests/unit/test_security.py +0 -324
- markitai-0.3.1/tests/unit/test_workflow_single.py +0 -353
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/converter/__init__.py +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/converter/_patches.py +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/converter/base.py +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/converter/image.py +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/converter/legacy.py +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/converter/office.py +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/converter/text.py +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/json_order.py +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/cleaner_user.md +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/document_enhance_complete_user.md +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/document_enhance_user.md +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/document_process_user.md +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/image_analysis_system.md +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/image_analysis_user.md +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/image_caption_system.md +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/image_caption_user.md +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/image_description_system.md +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/image_description_user.md +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/page_content_system.md +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/page_content_user.md +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/url_enhance_user.md +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/types.py +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/urls.py +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/utils/mime.py +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/utils/office.py +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/utils/output.py +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/utils/paths.py +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/src/markitai/workflow/__init__.py +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/tests/SKILL.md +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/tests/__init__.py +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/tests/conftest.py +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/tests/fixtures/Free_Test_Data_500KB_PPTX.pptx +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/tests/fixtures/candy.JPG +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/tests/fixtures/file-example_PDF_500_kB.pdf +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/tests/fixtures/file_example_XLSX_100.xlsx +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/tests/fixtures/sub_dir/file-sample_100kB.doc +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/tests/fixtures/sub_dir/file_example_PPT_250kB.ppt +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/tests/fixtures/sub_dir/file_example_XLS_100.xls +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/tests/fixtures/test.urls +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/tests/integration/__init__.py +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/tests/integration/test_url.py +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/tests/unit/__init__.py +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/tests/unit/test_atomic.py +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/tests/unit/test_batch.py +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/tests/unit/test_config.py +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/tests/unit/test_converter.py +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/tests/unit/test_image_converter.py +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/tests/unit/test_json_order.py +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/tests/unit/test_llm_runtime.py +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/tests/unit/test_ocr.py +0 -0
- {markitai-0.3.1 → markitai-0.4.1}/tests/unit/test_schema_sync.py +0 -0
|
@@ -38,6 +38,7 @@ ENV/
|
|
|
38
38
|
# Testing
|
|
39
39
|
.pytest_cache/
|
|
40
40
|
.coverage
|
|
41
|
+
coverage.xml
|
|
41
42
|
htmlcov/
|
|
42
43
|
.tox/
|
|
43
44
|
.nox/
|
|
@@ -46,6 +47,9 @@ htmlcov/
|
|
|
46
47
|
.mypy_cache/
|
|
47
48
|
.pytype/
|
|
48
49
|
|
|
50
|
+
# Linting
|
|
51
|
+
.ruff_cache/
|
|
52
|
+
|
|
49
53
|
# Markitai output
|
|
50
54
|
output/
|
|
51
55
|
output-*/
|
|
@@ -55,6 +59,7 @@ markitai.json
|
|
|
55
59
|
|
|
56
60
|
# Logs
|
|
57
61
|
logs/
|
|
62
|
+
logs_*/
|
|
58
63
|
*.log
|
|
59
64
|
|
|
60
65
|
# Environment variables (API keys)
|
|
@@ -66,13 +71,8 @@ logs/
|
|
|
66
71
|
.DS_Store
|
|
67
72
|
Thumbs.db
|
|
68
73
|
|
|
69
|
-
#
|
|
70
|
-
|
|
71
|
-
cache.db-wal
|
|
72
|
-
cache.db-shm
|
|
73
|
-
*.db-wal
|
|
74
|
-
*.db-shm
|
|
75
|
-
fetch_cache.db
|
|
74
|
+
# Markitai cache directory
|
|
75
|
+
.markitai/
|
|
76
76
|
|
|
77
77
|
# VitePress (website)
|
|
78
78
|
website/node_modules/
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: markitai
|
|
3
|
-
Version: 0.
|
|
4
|
-
Summary:
|
|
3
|
+
Version: 0.4.1
|
|
4
|
+
Summary: Opinionated Markdown converter with native LLM enhancement support
|
|
5
5
|
Project-URL: Homepage, https://markitai.ynewtime.com
|
|
6
6
|
Project-URL: Documentation, https://markitai.ynewtime.com/guide/getting-started
|
|
7
7
|
Project-URL: Repository, https://github.com/Ynewtime/markitai
|
|
@@ -20,7 +20,7 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
20
20
|
Classifier: Programming Language :: Python :: 3.13
|
|
21
21
|
Classifier: Topic :: Text Processing :: Markup :: Markdown
|
|
22
22
|
Classifier: Topic :: Utilities
|
|
23
|
-
Requires-Python:
|
|
23
|
+
Requires-Python: <3.14,>=3.11
|
|
24
24
|
Requires-Dist: aiofiles>=25.1.0
|
|
25
25
|
Requires-Dist: click>=8.1.0
|
|
26
26
|
Requires-Dist: instructor>=1.14.0
|
|
@@ -36,10 +36,21 @@ Requires-Dist: pywin32>=310; sys_platform == 'win32'
|
|
|
36
36
|
Requires-Dist: rapidocr>=3.5.0
|
|
37
37
|
Requires-Dist: rich>=14.2.0
|
|
38
38
|
Provides-Extra: all
|
|
39
|
+
Requires-Dist: claude-agent-sdk>=0.1.0; extra == 'all'
|
|
40
|
+
Requires-Dist: github-copilot-sdk>=0.1.0; extra == 'all'
|
|
41
|
+
Requires-Dist: playwright>=1.50.0; extra == 'all'
|
|
42
|
+
Provides-Extra: browser
|
|
43
|
+
Requires-Dist: playwright>=1.50.0; extra == 'browser'
|
|
44
|
+
Provides-Extra: claude-agent
|
|
45
|
+
Requires-Dist: claude-agent-sdk>=0.1.0; extra == 'claude-agent'
|
|
46
|
+
Provides-Extra: copilot
|
|
47
|
+
Requires-Dist: github-copilot-sdk>=0.1.0; extra == 'copilot'
|
|
39
48
|
Description-Content-Type: text/markdown
|
|
40
49
|
|
|
41
50
|
# Markitai
|
|
42
51
|
|
|
52
|
+
English | [简体中文](./README_ZH.md)
|
|
53
|
+
|
|
43
54
|
Opinionated Markdown converter with native LLM enhancement support.
|
|
44
55
|
|
|
45
56
|
## Features
|
|
@@ -66,11 +77,11 @@ irm https://raw.githubusercontent.com/Ynewtime/markitai/main/scripts/setup.ps1 |
|
|
|
66
77
|
### Manual Installation
|
|
67
78
|
|
|
68
79
|
```bash
|
|
69
|
-
# Requires Python 3.11
|
|
80
|
+
# Requires Python 3.11-3.13 (3.14 not yet supported)
|
|
70
81
|
uv tool install markitai
|
|
71
82
|
|
|
72
|
-
# Or using pip
|
|
73
|
-
pip install
|
|
83
|
+
# Or using uv pip (for virtual environment)
|
|
84
|
+
uv pip install markitai
|
|
74
85
|
```
|
|
75
86
|
|
|
76
87
|
## Quick Start
|
|
@@ -129,10 +140,34 @@ markitai cache stats
|
|
|
129
140
|
|
|
130
141
|
# Clear cache
|
|
131
142
|
markitai cache clear
|
|
143
|
+
|
|
144
|
+
# Check system health and dependencies
|
|
145
|
+
markitai doctor
|
|
132
146
|
```
|
|
133
147
|
|
|
134
148
|
Config file location: `./markitai.json` or `~/.markitai/config.json`
|
|
135
149
|
|
|
150
|
+
### Local Providers (Subscription-based)
|
|
151
|
+
|
|
152
|
+
Use your existing Claude Code or GitHub Copilot subscription:
|
|
153
|
+
|
|
154
|
+
```bash
|
|
155
|
+
# Claude Agent (requires Claude Code CLI)
|
|
156
|
+
markitai document.pdf --llm # Configure claude-agent/sonnet in config
|
|
157
|
+
|
|
158
|
+
# GitHub Copilot (requires Copilot CLI)
|
|
159
|
+
markitai document.pdf --llm # Configure copilot/gpt-5.2 in config
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
Install CLI tools:
|
|
163
|
+
```bash
|
|
164
|
+
# Claude Code CLI
|
|
165
|
+
curl -fsSL https://claude.ai/install.sh | bash
|
|
166
|
+
|
|
167
|
+
# GitHub Copilot CLI
|
|
168
|
+
curl -fsSL https://gh.io/copilot-install | bash
|
|
169
|
+
```
|
|
170
|
+
|
|
136
171
|
## Environment Variables
|
|
137
172
|
|
|
138
173
|
| Variable | Description |
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
# Markitai
|
|
2
2
|
|
|
3
|
+
English | [简体中文](./README_ZH.md)
|
|
4
|
+
|
|
3
5
|
Opinionated Markdown converter with native LLM enhancement support.
|
|
4
6
|
|
|
5
7
|
## Features
|
|
@@ -26,11 +28,11 @@ irm https://raw.githubusercontent.com/Ynewtime/markitai/main/scripts/setup.ps1 |
|
|
|
26
28
|
### Manual Installation
|
|
27
29
|
|
|
28
30
|
```bash
|
|
29
|
-
# Requires Python 3.11
|
|
31
|
+
# Requires Python 3.11-3.13 (3.14 not yet supported)
|
|
30
32
|
uv tool install markitai
|
|
31
33
|
|
|
32
|
-
# Or using pip
|
|
33
|
-
pip install
|
|
34
|
+
# Or using uv pip (for virtual environment)
|
|
35
|
+
uv pip install markitai
|
|
34
36
|
```
|
|
35
37
|
|
|
36
38
|
## Quick Start
|
|
@@ -89,10 +91,34 @@ markitai cache stats
|
|
|
89
91
|
|
|
90
92
|
# Clear cache
|
|
91
93
|
markitai cache clear
|
|
94
|
+
|
|
95
|
+
# Check system health and dependencies
|
|
96
|
+
markitai doctor
|
|
92
97
|
```
|
|
93
98
|
|
|
94
99
|
Config file location: `./markitai.json` or `~/.markitai/config.json`
|
|
95
100
|
|
|
101
|
+
### Local Providers (Subscription-based)
|
|
102
|
+
|
|
103
|
+
Use your existing Claude Code or GitHub Copilot subscription:
|
|
104
|
+
|
|
105
|
+
```bash
|
|
106
|
+
# Claude Agent (requires Claude Code CLI)
|
|
107
|
+
markitai document.pdf --llm # Configure claude-agent/sonnet in config
|
|
108
|
+
|
|
109
|
+
# GitHub Copilot (requires Copilot CLI)
|
|
110
|
+
markitai document.pdf --llm # Configure copilot/gpt-5.2 in config
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Install CLI tools:
|
|
114
|
+
```bash
|
|
115
|
+
# Claude Code CLI
|
|
116
|
+
curl -fsSL https://claude.ai/install.sh | bash
|
|
117
|
+
|
|
118
|
+
# GitHub Copilot CLI
|
|
119
|
+
curl -fsSL https://gh.io/copilot-install | bash
|
|
120
|
+
```
|
|
121
|
+
|
|
96
122
|
## Environment Variables
|
|
97
123
|
|
|
98
124
|
| Variable | Description |
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "markitai"
|
|
3
|
-
version = "0.
|
|
4
|
-
description = "
|
|
3
|
+
version = "0.4.1"
|
|
4
|
+
description = "Opinionated Markdown converter with native LLM enhancement support"
|
|
5
5
|
license = "MIT"
|
|
6
6
|
readme = "README.md"
|
|
7
|
-
requires-python = ">=3.11"
|
|
7
|
+
requires-python = ">=3.11,<3.14"
|
|
8
8
|
authors = [
|
|
9
9
|
{ name = "Ynewtime", email = "longqiliuye@gmail.com" }
|
|
10
10
|
]
|
|
@@ -49,7 +49,10 @@ Changelog = "https://github.com/Ynewtime/markitai/blob/main/CHANGELOG.md"
|
|
|
49
49
|
markitai = "markitai.cli:app"
|
|
50
50
|
|
|
51
51
|
[project.optional-dependencies]
|
|
52
|
-
|
|
52
|
+
claude-agent = ["claude-agent-sdk>=0.1.0"]
|
|
53
|
+
copilot = ["github-copilot-sdk>=0.1.0"]
|
|
54
|
+
browser = ["playwright>=1.50.0"]
|
|
55
|
+
all = ["claude-agent-sdk>=0.1.0", "github-copilot-sdk>=0.1.0", "playwright>=1.50.0"]
|
|
53
56
|
|
|
54
57
|
[dependency-groups]
|
|
55
58
|
dev = [
|
|
@@ -72,9 +75,13 @@ packages = ["src/markitai"]
|
|
|
72
75
|
testpaths = ["tests"]
|
|
73
76
|
asyncio_mode = "auto"
|
|
74
77
|
asyncio_default_fixture_loop_scope = "function"
|
|
78
|
+
markers = [
|
|
79
|
+
"slow: marks tests as slow (deselect with '-m \"not slow\"')",
|
|
80
|
+
"network: marks tests that require network access (deselect with '-m \"not network\"')",
|
|
81
|
+
]
|
|
75
82
|
|
|
76
83
|
[tool.ruff]
|
|
77
|
-
target-version = "
|
|
84
|
+
target-version = "py313"
|
|
78
85
|
line-length = 88
|
|
79
86
|
src = ["src", "tests"]
|
|
80
87
|
|
|
@@ -115,13 +122,15 @@ skip-magic-trailing-comma = false
|
|
|
115
122
|
line-ending = "auto"
|
|
116
123
|
|
|
117
124
|
[tool.pyright]
|
|
118
|
-
pythonVersion = "3.
|
|
125
|
+
pythonVersion = "3.13"
|
|
119
126
|
typeCheckingMode = "basic"
|
|
120
127
|
include = ["src"]
|
|
121
128
|
exclude = ["tests", "**/__pycache__"]
|
|
122
129
|
venvPath = "../.."
|
|
123
130
|
venv = ".venv"
|
|
124
|
-
|
|
131
|
+
# Allow optional dependencies to be missing (claude-agent-sdk)
|
|
132
|
+
# These are runtime-checked before import using importlib.util.find_spec
|
|
133
|
+
reportMissingImports = "warning"
|
|
125
134
|
reportMissingTypeStubs = false
|
|
126
135
|
reportUnusedImport = true
|
|
127
136
|
reportUnusedVariable = "warning"
|
|
@@ -13,7 +13,7 @@ from pathlib import Path
|
|
|
13
13
|
from typing import TYPE_CHECKING, Any
|
|
14
14
|
|
|
15
15
|
from loguru import logger
|
|
16
|
-
from rich.console import
|
|
16
|
+
from rich.console import Group
|
|
17
17
|
from rich.live import Live
|
|
18
18
|
from rich.panel import Panel
|
|
19
19
|
from rich.progress import (
|
|
@@ -28,9 +28,11 @@ from rich.progress import (
|
|
|
28
28
|
from rich.table import Table
|
|
29
29
|
from rich.text import Text
|
|
30
30
|
|
|
31
|
+
from markitai.cli.console import get_console
|
|
31
32
|
from markitai.constants import DEFAULT_LOG_PANEL_MAX_LINES
|
|
32
33
|
from markitai.json_order import order_report, order_state
|
|
33
34
|
from markitai.security import atomic_write_json
|
|
35
|
+
from markitai.utils.text import format_error_message
|
|
34
36
|
|
|
35
37
|
if TYPE_CHECKING:
|
|
36
38
|
from markitai.config import BatchConfig
|
|
@@ -464,10 +466,15 @@ class BatchProcessor:
|
|
|
464
466
|
self.state_file = self._get_state_file_path()
|
|
465
467
|
self.report_file = self._get_report_file_path()
|
|
466
468
|
self.state: BatchState | None = None
|
|
467
|
-
self.console =
|
|
469
|
+
self.console = get_console()
|
|
468
470
|
# Collect image analysis results for JSON aggregation
|
|
469
471
|
self.image_analysis_results: list[ImageAnalysisResult] = []
|
|
470
472
|
|
|
473
|
+
# Optimization: Lock for state saving to prevent IO congestion
|
|
474
|
+
import threading
|
|
475
|
+
|
|
476
|
+
self._save_lock = threading.Lock()
|
|
477
|
+
|
|
471
478
|
# Live display state (managed by start_live_display/stop_live_display)
|
|
472
479
|
self._live: Live | None = None
|
|
473
480
|
self._log_panel: LogPanel | None = None
|
|
@@ -515,7 +522,7 @@ class BatchProcessor:
|
|
|
515
522
|
"options": key_options,
|
|
516
523
|
}
|
|
517
524
|
hash_str = json.dumps(hash_params, sort_keys=True)
|
|
518
|
-
return hashlib.md5(hash_str.encode()).hexdigest()[:6]
|
|
525
|
+
return hashlib.md5(hash_str.encode(), usedforsecurity=False).hexdigest()[:6]
|
|
519
526
|
|
|
520
527
|
def _get_state_file_path(self) -> Path:
|
|
521
528
|
"""Generate state file path for resume capability.
|
|
@@ -543,11 +550,17 @@ class BatchProcessor:
|
|
|
543
550
|
return base_path
|
|
544
551
|
else: # rename
|
|
545
552
|
seq = 2
|
|
546
|
-
|
|
553
|
+
max_seq = 9999 # Safety limit to prevent infinite loop
|
|
554
|
+
while seq <= max_seq:
|
|
547
555
|
new_path = reports_dir / f"markitai.{self.task_hash}.v{seq}.report.json"
|
|
548
556
|
if not new_path.exists():
|
|
549
557
|
return new_path
|
|
550
558
|
seq += 1
|
|
559
|
+
# Fallback: use timestamp if too many versions exist
|
|
560
|
+
import time
|
|
561
|
+
|
|
562
|
+
ts = int(time.time())
|
|
563
|
+
return reports_dir / f"markitai.{self.task_hash}.{ts}.report.json"
|
|
551
564
|
|
|
552
565
|
def start_live_display(
|
|
553
566
|
self,
|
|
@@ -807,6 +820,7 @@ class BatchProcessor:
|
|
|
807
820
|
Optimized with interval-based throttling:
|
|
808
821
|
- Checks interval BEFORE serialization to avoid unnecessary work
|
|
809
822
|
- Uses minimal serialization when possible
|
|
823
|
+
- Uses thread lock to prevent concurrent disk writes
|
|
810
824
|
|
|
811
825
|
Args:
|
|
812
826
|
force: Force save even if interval hasn't passed
|
|
@@ -816,27 +830,35 @@ class BatchProcessor:
|
|
|
816
830
|
return
|
|
817
831
|
|
|
818
832
|
now = datetime.now().astimezone()
|
|
819
|
-
|
|
833
|
+
# Default to 5 seconds if not specified in config to prevent $O(N^2)$ IO
|
|
834
|
+
interval = getattr(self.config, "state_flush_interval_seconds", 5) or 5
|
|
820
835
|
|
|
821
836
|
# Check interval BEFORE any serialization work (optimization)
|
|
822
|
-
if not force
|
|
837
|
+
if not force:
|
|
823
838
|
last_saved = getattr(self, "_last_state_save", None)
|
|
824
839
|
if last_saved and (now - last_saved).total_seconds() < interval:
|
|
825
840
|
return # Skip: interval not passed, no work done
|
|
826
841
|
|
|
827
|
-
|
|
842
|
+
# Ensure only one thread is writing at a time
|
|
843
|
+
if not self._save_lock.acquire(blocking=force):
|
|
844
|
+
return # Skip if another thread is already saving, unless forced
|
|
828
845
|
|
|
829
|
-
|
|
830
|
-
|
|
846
|
+
try:
|
|
847
|
+
self.state.updated_at = now.isoformat()
|
|
848
|
+
|
|
849
|
+
# Build minimal state document (only what's needed for resume)
|
|
850
|
+
state_data = self.state.to_minimal_dict()
|
|
831
851
|
|
|
832
|
-
|
|
833
|
-
|
|
852
|
+
# Ensure states directory exists
|
|
853
|
+
self.state_file.parent.mkdir(parents=True, exist_ok=True)
|
|
834
854
|
|
|
835
|
-
|
|
836
|
-
|
|
855
|
+
atomic_write_json(self.state_file, state_data, order_func=order_state)
|
|
856
|
+
self._last_state_save = now
|
|
837
857
|
|
|
838
|
-
|
|
839
|
-
|
|
858
|
+
if log:
|
|
859
|
+
logger.info(f"State file saved: {self.state_file.resolve()}")
|
|
860
|
+
finally:
|
|
861
|
+
self._save_lock.release()
|
|
840
862
|
|
|
841
863
|
def _compute_summary(self) -> dict[str, Any]:
|
|
842
864
|
"""Compute summary statistics for report."""
|
|
@@ -1135,8 +1157,10 @@ class BatchProcessor:
|
|
|
1135
1157
|
|
|
1136
1158
|
except Exception as e:
|
|
1137
1159
|
file_state.status = FileStatus.FAILED
|
|
1138
|
-
file_state.error =
|
|
1139
|
-
logger.error(
|
|
1160
|
+
file_state.error = format_error_message(e)
|
|
1161
|
+
logger.error(
|
|
1162
|
+
f"Failed to process {file_path.name}: {format_error_message(e)}"
|
|
1163
|
+
)
|
|
1140
1164
|
|
|
1141
1165
|
finally:
|
|
1142
1166
|
end_time = asyncio.get_event_loop().time()
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
"""CLI package for Markitai.
|
|
2
|
+
|
|
3
|
+
This package provides the command-line interface for Markitai.
|
|
4
|
+
|
|
5
|
+
Usage:
|
|
6
|
+
from markitai.cli import app
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
# Re-export CLI app
|
|
12
|
+
from markitai.cli.main import app
|
|
13
|
+
|
|
14
|
+
# Re-export validators from processors
|
|
15
|
+
from markitai.cli.processors.validators import (
|
|
16
|
+
warn_case_sensitivity_mismatches as _warn_case_sensitivity_mismatches,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
# Re-export utilities from refactored modules
|
|
20
|
+
from markitai.utils.cli_helpers import (
|
|
21
|
+
compute_task_hash,
|
|
22
|
+
get_report_file_path,
|
|
23
|
+
is_url,
|
|
24
|
+
sanitize_filename,
|
|
25
|
+
url_to_filename,
|
|
26
|
+
)
|
|
27
|
+
from markitai.utils.output import resolve_output_path
|
|
28
|
+
from markitai.utils.progress import ProgressReporter
|
|
29
|
+
|
|
30
|
+
# Re-export from workflow helpers
|
|
31
|
+
from markitai.workflow.helpers import write_images_json
|
|
32
|
+
|
|
33
|
+
# Re-export types from workflow for backward compatibility
|
|
34
|
+
from markitai.workflow.single import ImageAnalysisResult
|
|
35
|
+
|
|
36
|
+
# Backward compatibility alias (deprecated, use sanitize_filename instead)
|
|
37
|
+
_sanitize_filename = sanitize_filename
|
|
38
|
+
|
|
39
|
+
__all__ = [
|
|
40
|
+
"app",
|
|
41
|
+
"ProgressReporter",
|
|
42
|
+
"is_url",
|
|
43
|
+
"url_to_filename",
|
|
44
|
+
"sanitize_filename",
|
|
45
|
+
"_sanitize_filename", # Deprecated alias
|
|
46
|
+
"_warn_case_sensitivity_mismatches",
|
|
47
|
+
"compute_task_hash",
|
|
48
|
+
"get_report_file_path",
|
|
49
|
+
"resolve_output_path",
|
|
50
|
+
"write_images_json",
|
|
51
|
+
"ImageAnalysisResult",
|
|
52
|
+
]
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""CLI commands package.
|
|
2
|
+
|
|
3
|
+
This package contains CLI command groups for Markitai.
|
|
4
|
+
|
|
5
|
+
Available command groups:
|
|
6
|
+
- config: Configuration management commands
|
|
7
|
+
- cache: Cache management commands
|
|
8
|
+
- doctor: System health and dependency checking command
|
|
9
|
+
- check_deps: Alias for doctor (backward compatibility)
|
|
10
|
+
"""
|
|
11
|
+
|
|
12
|
+
from __future__ import annotations
|
|
13
|
+
|
|
14
|
+
from markitai.cli.commands.cache import cache
|
|
15
|
+
from markitai.cli.commands.config import config
|
|
16
|
+
from markitai.cli.commands.doctor import check_deps, doctor
|
|
17
|
+
|
|
18
|
+
__all__ = ["cache", "config", "doctor", "check_deps"]
|