markitai 0.3.1__tar.gz → 0.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (164) hide show
  1. {markitai-0.3.1 → markitai-0.4.1}/.gitignore +7 -7
  2. {markitai-0.3.1 → markitai-0.4.1}/PKG-INFO +41 -6
  3. {markitai-0.3.1 → markitai-0.4.1}/README.md +29 -3
  4. {markitai-0.3.1 → markitai-0.4.1}/pyproject.toml +16 -7
  5. markitai-0.4.1/src/markitai/__init__.py +3 -0
  6. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/batch.py +41 -17
  7. markitai-0.4.1/src/markitai/cli/__init__.py +52 -0
  8. markitai-0.4.1/src/markitai/cli/commands/__init__.py +18 -0
  9. markitai-0.4.1/src/markitai/cli/commands/cache.py +292 -0
  10. markitai-0.4.1/src/markitai/cli/commands/config.py +240 -0
  11. markitai-0.4.1/src/markitai/cli/commands/doctor.py +561 -0
  12. markitai-0.4.1/src/markitai/cli/console.py +50 -0
  13. markitai-0.4.1/src/markitai/cli/framework.py +130 -0
  14. markitai-0.4.1/src/markitai/cli/logging_config.py +377 -0
  15. markitai-0.4.1/src/markitai/cli/main.py +1036 -0
  16. markitai-0.4.1/src/markitai/cli/processors/__init__.py +47 -0
  17. markitai-0.4.1/src/markitai/cli/processors/batch.py +877 -0
  18. markitai-0.4.1/src/markitai/cli/processors/file.py +226 -0
  19. markitai-0.4.1/src/markitai/cli/processors/llm.py +383 -0
  20. markitai-0.4.1/src/markitai/cli/processors/url.py +1050 -0
  21. markitai-0.4.1/src/markitai/cli/processors/validators.py +265 -0
  22. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/config.py +85 -27
  23. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/config.schema.json +23 -35
  24. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/constants.py +51 -9
  25. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/converter/pdf.py +2 -2
  26. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/fetch.py +606 -887
  27. markitai-0.4.1/src/markitai/fetch_playwright.py +482 -0
  28. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/image.py +45 -5
  29. markitai-0.4.1/src/markitai/llm/__init__.py +100 -0
  30. markitai-0.4.1/src/markitai/llm/cache.py +521 -0
  31. markitai-0.4.1/src/markitai/llm/content.py +632 -0
  32. markitai-0.4.1/src/markitai/llm/document.py +1525 -0
  33. markitai-0.4.1/src/markitai/llm/models.py +205 -0
  34. markitai-0.4.1/src/markitai/llm/processor.py +2361 -0
  35. markitai-0.4.1/src/markitai/llm/types.py +201 -0
  36. markitai-0.4.1/src/markitai/llm/vision.py +866 -0
  37. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/ocr.py +2 -3
  38. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/__init__.py +6 -3
  39. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/cleaner_system.md +10 -0
  40. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/document_enhance_complete_system.md +40 -6
  41. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/document_enhance_system.md +32 -1
  42. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/document_process_system.md +19 -2
  43. markitai-0.4.1/src/markitai/prompts/document_vision_system.md +117 -0
  44. markitai-0.4.1/src/markitai/prompts/document_vision_user.md +5 -0
  45. markitai-0.4.1/src/markitai/prompts/screenshot_extract_system.md +76 -0
  46. markitai-0.4.1/src/markitai/prompts/screenshot_extract_user.md +1 -0
  47. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/url_enhance_system.md +30 -8
  48. markitai-0.4.1/src/markitai/providers/__init__.py +695 -0
  49. markitai-0.4.1/src/markitai/providers/auth.py +351 -0
  50. markitai-0.4.1/src/markitai/providers/claude_agent.py +649 -0
  51. markitai-0.4.1/src/markitai/providers/copilot.py +844 -0
  52. markitai-0.4.1/src/markitai/providers/errors.py +225 -0
  53. markitai-0.4.1/src/markitai/providers/json_mode.py +217 -0
  54. markitai-0.4.1/src/markitai/providers/timeout.py +169 -0
  55. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/security.py +88 -6
  56. markitai-0.4.1/src/markitai/utils/__init__.py +69 -0
  57. markitai-0.4.1/src/markitai/utils/cli_helpers.py +171 -0
  58. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/utils/executor.py +13 -0
  59. markitai-0.4.1/src/markitai/utils/frontmatter.py +315 -0
  60. markitai-0.4.1/src/markitai/utils/progress.py +92 -0
  61. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/utils/text.py +110 -0
  62. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/workflow/core.py +68 -36
  63. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/workflow/helpers.py +46 -16
  64. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/workflow/single.py +91 -43
  65. {markitai-0.3.1 → markitai-0.4.1}/tests/integration/test_cache.py +29 -232
  66. {markitai-0.3.1 → markitai-0.4.1}/tests/integration/test_cli.py +1 -1
  67. markitai-0.4.1/tests/integration/test_cli_full.py +914 -0
  68. markitai-0.4.1/tests/integration/test_local_providers.py +855 -0
  69. {markitai-0.3.1 → markitai-0.4.1}/tests/integration/test_output_format.py +5 -4
  70. markitai-0.4.1/tests/integration/test_real_scenarios.py +379 -0
  71. markitai-0.4.1/tests/unit/test_batch_processor.py +1368 -0
  72. markitai-0.4.1/tests/unit/test_cache_cli.py +491 -0
  73. {markitai-0.3.1 → markitai-0.4.1}/tests/unit/test_cli_helpers.py +13 -5
  74. markitai-0.4.1/tests/unit/test_cli_main.py +867 -0
  75. markitai-0.4.1/tests/unit/test_config_cli.py +282 -0
  76. markitai-0.4.1/tests/unit/test_converter_pdf.py +889 -0
  77. markitai-0.4.1/tests/unit/test_deps_cli.py +742 -0
  78. markitai-0.4.1/tests/unit/test_doctor_cli.py +366 -0
  79. markitai-0.4.1/tests/unit/test_document_utils.py +1731 -0
  80. {markitai-0.3.1 → markitai-0.4.1}/tests/unit/test_executor.py +2 -2
  81. markitai-0.4.1/tests/unit/test_fetch.py +3230 -0
  82. markitai-0.4.1/tests/unit/test_fetch_playwright.py +1145 -0
  83. markitai-0.4.1/tests/unit/test_frontmatter.py +422 -0
  84. markitai-0.4.1/tests/unit/test_image.py +2453 -0
  85. {markitai-0.3.1 → markitai-0.4.1}/tests/unit/test_llm.py +63 -79
  86. markitai-0.4.1/tests/unit/test_llm_content.py +266 -0
  87. markitai-0.4.1/tests/unit/test_llm_models.py +545 -0
  88. markitai-0.4.1/tests/unit/test_llm_processor.py +1337 -0
  89. markitai-0.4.1/tests/unit/test_llm_processor_cli.py +1172 -0
  90. {markitai-0.3.1 → markitai-0.4.1}/tests/unit/test_prompts.py +1 -30
  91. markitai-0.4.1/tests/unit/test_provider_auth.py +512 -0
  92. markitai-0.4.1/tests/unit/test_provider_errors.py +307 -0
  93. markitai-0.4.1/tests/unit/test_provider_json_mode.py +236 -0
  94. markitai-0.4.1/tests/unit/test_provider_timeout.py +302 -0
  95. markitai-0.4.1/tests/unit/test_providers.py +1274 -0
  96. markitai-0.4.1/tests/unit/test_security.py +873 -0
  97. markitai-0.4.1/tests/unit/test_url_processor.py +878 -0
  98. markitai-0.4.1/tests/unit/test_utils_text.py +248 -0
  99. markitai-0.4.1/tests/unit/test_vision_mixin.py +1493 -0
  100. markitai-0.4.1/tests/unit/test_vision_utils.py +54 -0
  101. {markitai-0.3.1 → markitai-0.4.1}/tests/unit/test_workflow_core.py +757 -0
  102. {markitai-0.3.1 → markitai-0.4.1}/tests/unit/test_workflow_helpers.py +278 -0
  103. markitai-0.4.1/tests/unit/test_workflow_single.py +711 -0
  104. markitai-0.3.1/src/markitai/__init__.py +0 -3
  105. markitai-0.3.1/src/markitai/cli.py +0 -4081
  106. markitai-0.3.1/src/markitai/llm.py +0 -4474
  107. markitai-0.3.1/src/markitai/prompts/frontmatter_system.md +0 -24
  108. markitai-0.3.1/src/markitai/prompts/frontmatter_user.md +0 -5
  109. markitai-0.3.1/src/markitai/utils/__init__.py +0 -33
  110. markitai-0.3.1/tests/unit/test_fetch.py +0 -789
  111. markitai-0.3.1/tests/unit/test_image.py +0 -781
  112. markitai-0.3.1/tests/unit/test_security.py +0 -324
  113. markitai-0.3.1/tests/unit/test_workflow_single.py +0 -353
  114. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/converter/__init__.py +0 -0
  115. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/converter/_patches.py +0 -0
  116. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/converter/base.py +0 -0
  117. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/converter/image.py +0 -0
  118. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/converter/legacy.py +0 -0
  119. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/converter/office.py +0 -0
  120. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/converter/text.py +0 -0
  121. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/json_order.py +0 -0
  122. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/cleaner_user.md +0 -0
  123. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/document_enhance_complete_user.md +0 -0
  124. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/document_enhance_user.md +0 -0
  125. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/document_process_user.md +0 -0
  126. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/image_analysis_system.md +0 -0
  127. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/image_analysis_user.md +0 -0
  128. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/image_caption_system.md +0 -0
  129. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/image_caption_user.md +0 -0
  130. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/image_description_system.md +0 -0
  131. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/image_description_user.md +0 -0
  132. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/page_content_system.md +0 -0
  133. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/page_content_user.md +0 -0
  134. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/prompts/url_enhance_user.md +0 -0
  135. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/types.py +0 -0
  136. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/urls.py +0 -0
  137. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/utils/mime.py +0 -0
  138. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/utils/office.py +0 -0
  139. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/utils/output.py +0 -0
  140. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/utils/paths.py +0 -0
  141. {markitai-0.3.1 → markitai-0.4.1}/src/markitai/workflow/__init__.py +0 -0
  142. {markitai-0.3.1 → markitai-0.4.1}/tests/SKILL.md +0 -0
  143. {markitai-0.3.1 → markitai-0.4.1}/tests/__init__.py +0 -0
  144. {markitai-0.3.1 → markitai-0.4.1}/tests/conftest.py +0 -0
  145. {markitai-0.3.1 → markitai-0.4.1}/tests/fixtures/Free_Test_Data_500KB_PPTX.pptx +0 -0
  146. {markitai-0.3.1 → markitai-0.4.1}/tests/fixtures/candy.JPG +0 -0
  147. {markitai-0.3.1 → markitai-0.4.1}/tests/fixtures/file-example_PDF_500_kB.pdf +0 -0
  148. {markitai-0.3.1 → markitai-0.4.1}/tests/fixtures/file_example_XLSX_100.xlsx +0 -0
  149. {markitai-0.3.1 → markitai-0.4.1}/tests/fixtures/sub_dir/file-sample_100kB.doc +0 -0
  150. {markitai-0.3.1 → markitai-0.4.1}/tests/fixtures/sub_dir/file_example_PPT_250kB.ppt +0 -0
  151. {markitai-0.3.1 → markitai-0.4.1}/tests/fixtures/sub_dir/file_example_XLS_100.xls +0 -0
  152. {markitai-0.3.1 → markitai-0.4.1}/tests/fixtures/test.urls +0 -0
  153. {markitai-0.3.1 → markitai-0.4.1}/tests/integration/__init__.py +0 -0
  154. {markitai-0.3.1 → markitai-0.4.1}/tests/integration/test_url.py +0 -0
  155. {markitai-0.3.1 → markitai-0.4.1}/tests/unit/__init__.py +0 -0
  156. {markitai-0.3.1 → markitai-0.4.1}/tests/unit/test_atomic.py +0 -0
  157. {markitai-0.3.1 → markitai-0.4.1}/tests/unit/test_batch.py +0 -0
  158. {markitai-0.3.1 → markitai-0.4.1}/tests/unit/test_config.py +0 -0
  159. {markitai-0.3.1 → markitai-0.4.1}/tests/unit/test_converter.py +0 -0
  160. {markitai-0.3.1 → markitai-0.4.1}/tests/unit/test_image_converter.py +0 -0
  161. {markitai-0.3.1 → markitai-0.4.1}/tests/unit/test_json_order.py +0 -0
  162. {markitai-0.3.1 → markitai-0.4.1}/tests/unit/test_llm_runtime.py +0 -0
  163. {markitai-0.3.1 → markitai-0.4.1}/tests/unit/test_ocr.py +0 -0
  164. {markitai-0.3.1 → markitai-0.4.1}/tests/unit/test_schema_sync.py +0 -0
@@ -38,6 +38,7 @@ ENV/
38
38
  # Testing
39
39
  .pytest_cache/
40
40
  .coverage
41
+ coverage.xml
41
42
  htmlcov/
42
43
  .tox/
43
44
  .nox/
@@ -46,6 +47,9 @@ htmlcov/
46
47
  .mypy_cache/
47
48
  .pytype/
48
49
 
50
+ # Linting
51
+ .ruff_cache/
52
+
49
53
  # Markitai output
50
54
  output/
51
55
  output-*/
@@ -55,6 +59,7 @@ markitai.json
55
59
 
56
60
  # Logs
57
61
  logs/
62
+ logs_*/
58
63
  *.log
59
64
 
60
65
  # Environment variables (API keys)
@@ -66,13 +71,8 @@ logs/
66
71
  .DS_Store
67
72
  Thumbs.db
68
73
 
69
- # SQLite cache (including WAL mode files)
70
- cache.db
71
- cache.db-wal
72
- cache.db-shm
73
- *.db-wal
74
- *.db-shm
75
- fetch_cache.db
74
+ # Markitai cache directory
75
+ .markitai/
76
76
 
77
77
  # VitePress (website)
78
78
  website/node_modules/
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: markitai
3
- Version: 0.3.1
4
- Summary: Document to Markdown converter with LLM enhancement
3
+ Version: 0.4.1
4
+ Summary: Opinionated Markdown converter with native LLM enhancement support
5
5
  Project-URL: Homepage, https://markitai.ynewtime.com
6
6
  Project-URL: Documentation, https://markitai.ynewtime.com/guide/getting-started
7
7
  Project-URL: Repository, https://github.com/Ynewtime/markitai
@@ -20,7 +20,7 @@ Classifier: Programming Language :: Python :: 3.12
20
20
  Classifier: Programming Language :: Python :: 3.13
21
21
  Classifier: Topic :: Text Processing :: Markup :: Markdown
22
22
  Classifier: Topic :: Utilities
23
- Requires-Python: >=3.11
23
+ Requires-Python: <3.14,>=3.11
24
24
  Requires-Dist: aiofiles>=25.1.0
25
25
  Requires-Dist: click>=8.1.0
26
26
  Requires-Dist: instructor>=1.14.0
@@ -36,10 +36,21 @@ Requires-Dist: pywin32>=310; sys_platform == 'win32'
36
36
  Requires-Dist: rapidocr>=3.5.0
37
37
  Requires-Dist: rich>=14.2.0
38
38
  Provides-Extra: all
39
+ Requires-Dist: claude-agent-sdk>=0.1.0; extra == 'all'
40
+ Requires-Dist: github-copilot-sdk>=0.1.0; extra == 'all'
41
+ Requires-Dist: playwright>=1.50.0; extra == 'all'
42
+ Provides-Extra: browser
43
+ Requires-Dist: playwright>=1.50.0; extra == 'browser'
44
+ Provides-Extra: claude-agent
45
+ Requires-Dist: claude-agent-sdk>=0.1.0; extra == 'claude-agent'
46
+ Provides-Extra: copilot
47
+ Requires-Dist: github-copilot-sdk>=0.1.0; extra == 'copilot'
39
48
  Description-Content-Type: text/markdown
40
49
 
41
50
  # Markitai
42
51
 
52
+ English | [简体中文](./README_ZH.md)
53
+
43
54
  Opinionated Markdown converter with native LLM enhancement support.
44
55
 
45
56
  ## Features
@@ -66,11 +77,11 @@ irm https://raw.githubusercontent.com/Ynewtime/markitai/main/scripts/setup.ps1 |
66
77
  ### Manual Installation
67
78
 
68
79
  ```bash
69
- # Requires Python 3.11+
80
+ # Requires Python 3.11-3.13 (3.14 not yet supported)
70
81
  uv tool install markitai
71
82
 
72
- # Or using pip
73
- pip install --user markitai
83
+ # Or using uv pip (for virtual environment)
84
+ uv pip install markitai
74
85
  ```
75
86
 
76
87
  ## Quick Start
@@ -129,10 +140,34 @@ markitai cache stats
129
140
 
130
141
  # Clear cache
131
142
  markitai cache clear
143
+
144
+ # Check system health and dependencies
145
+ markitai doctor
132
146
  ```
133
147
 
134
148
  Config file location: `./markitai.json` or `~/.markitai/config.json`
135
149
 
150
+ ### Local Providers (Subscription-based)
151
+
152
+ Use your existing Claude Code or GitHub Copilot subscription:
153
+
154
+ ```bash
155
+ # Claude Agent (requires Claude Code CLI)
156
+ markitai document.pdf --llm # Configure claude-agent/sonnet in config
157
+
158
+ # GitHub Copilot (requires Copilot CLI)
159
+ markitai document.pdf --llm # Configure copilot/gpt-5.2 in config
160
+ ```
161
+
162
+ Install CLI tools:
163
+ ```bash
164
+ # Claude Code CLI
165
+ curl -fsSL https://claude.ai/install.sh | bash
166
+
167
+ # GitHub Copilot CLI
168
+ curl -fsSL https://gh.io/copilot-install | bash
169
+ ```
170
+
136
171
  ## Environment Variables
137
172
 
138
173
  | Variable | Description |
@@ -1,5 +1,7 @@
1
1
  # Markitai
2
2
 
3
+ English | [简体中文](./README_ZH.md)
4
+
3
5
  Opinionated Markdown converter with native LLM enhancement support.
4
6
 
5
7
  ## Features
@@ -26,11 +28,11 @@ irm https://raw.githubusercontent.com/Ynewtime/markitai/main/scripts/setup.ps1 |
26
28
  ### Manual Installation
27
29
 
28
30
  ```bash
29
- # Requires Python 3.11+
31
+ # Requires Python 3.11-3.13 (3.14 not yet supported)
30
32
  uv tool install markitai
31
33
 
32
- # Or using pip
33
- pip install --user markitai
34
+ # Or using uv pip (for virtual environment)
35
+ uv pip install markitai
34
36
  ```
35
37
 
36
38
  ## Quick Start
@@ -89,10 +91,34 @@ markitai cache stats
89
91
 
90
92
  # Clear cache
91
93
  markitai cache clear
94
+
95
+ # Check system health and dependencies
96
+ markitai doctor
92
97
  ```
93
98
 
94
99
  Config file location: `./markitai.json` or `~/.markitai/config.json`
95
100
 
101
+ ### Local Providers (Subscription-based)
102
+
103
+ Use your existing Claude Code or GitHub Copilot subscription:
104
+
105
+ ```bash
106
+ # Claude Agent (requires Claude Code CLI)
107
+ markitai document.pdf --llm # Configure claude-agent/sonnet in config
108
+
109
+ # GitHub Copilot (requires Copilot CLI)
110
+ markitai document.pdf --llm # Configure copilot/gpt-5.2 in config
111
+ ```
112
+
113
+ Install CLI tools:
114
+ ```bash
115
+ # Claude Code CLI
116
+ curl -fsSL https://claude.ai/install.sh | bash
117
+
118
+ # GitHub Copilot CLI
119
+ curl -fsSL https://gh.io/copilot-install | bash
120
+ ```
121
+
96
122
  ## Environment Variables
97
123
 
98
124
  | Variable | Description |
@@ -1,10 +1,10 @@
1
1
  [project]
2
2
  name = "markitai"
3
- version = "0.3.1"
4
- description = "Document to Markdown converter with LLM enhancement"
3
+ version = "0.4.1"
4
+ description = "Opinionated Markdown converter with native LLM enhancement support"
5
5
  license = "MIT"
6
6
  readme = "README.md"
7
- requires-python = ">=3.11"
7
+ requires-python = ">=3.11,<3.14"
8
8
  authors = [
9
9
  { name = "Ynewtime", email = "longqiliuye@gmail.com" }
10
10
  ]
@@ -49,7 +49,10 @@ Changelog = "https://github.com/Ynewtime/markitai/blob/main/CHANGELOG.md"
49
49
  markitai = "markitai.cli:app"
50
50
 
51
51
  [project.optional-dependencies]
52
- all = []
52
+ claude-agent = ["claude-agent-sdk>=0.1.0"]
53
+ copilot = ["github-copilot-sdk>=0.1.0"]
54
+ browser = ["playwright>=1.50.0"]
55
+ all = ["claude-agent-sdk>=0.1.0", "github-copilot-sdk>=0.1.0", "playwright>=1.50.0"]
53
56
 
54
57
  [dependency-groups]
55
58
  dev = [
@@ -72,9 +75,13 @@ packages = ["src/markitai"]
72
75
  testpaths = ["tests"]
73
76
  asyncio_mode = "auto"
74
77
  asyncio_default_fixture_loop_scope = "function"
78
+ markers = [
79
+ "slow: marks tests as slow (deselect with '-m \"not slow\"')",
80
+ "network: marks tests that require network access (deselect with '-m \"not network\"')",
81
+ ]
75
82
 
76
83
  [tool.ruff]
77
- target-version = "py311"
84
+ target-version = "py313"
78
85
  line-length = 88
79
86
  src = ["src", "tests"]
80
87
 
@@ -115,13 +122,15 @@ skip-magic-trailing-comma = false
115
122
  line-ending = "auto"
116
123
 
117
124
  [tool.pyright]
118
- pythonVersion = "3.11"
125
+ pythonVersion = "3.13"
119
126
  typeCheckingMode = "basic"
120
127
  include = ["src"]
121
128
  exclude = ["tests", "**/__pycache__"]
122
129
  venvPath = "../.."
123
130
  venv = ".venv"
124
- reportMissingImports = true
131
+ # Allow optional dependencies to be missing (claude-agent-sdk)
132
+ # These are runtime-checked before import using importlib.util.find_spec
133
+ reportMissingImports = "warning"
125
134
  reportMissingTypeStubs = false
126
135
  reportUnusedImport = true
127
136
  reportUnusedVariable = "warning"
@@ -0,0 +1,3 @@
1
+ """Markitai - Opinionated Markdown converter with native LLM enhancement support."""
2
+
3
+ __version__ = "0.4.1"
@@ -13,7 +13,7 @@ from pathlib import Path
13
13
  from typing import TYPE_CHECKING, Any
14
14
 
15
15
  from loguru import logger
16
- from rich.console import Console, Group
16
+ from rich.console import Group
17
17
  from rich.live import Live
18
18
  from rich.panel import Panel
19
19
  from rich.progress import (
@@ -28,9 +28,11 @@ from rich.progress import (
28
28
  from rich.table import Table
29
29
  from rich.text import Text
30
30
 
31
+ from markitai.cli.console import get_console
31
32
  from markitai.constants import DEFAULT_LOG_PANEL_MAX_LINES
32
33
  from markitai.json_order import order_report, order_state
33
34
  from markitai.security import atomic_write_json
35
+ from markitai.utils.text import format_error_message
34
36
 
35
37
  if TYPE_CHECKING:
36
38
  from markitai.config import BatchConfig
@@ -464,10 +466,15 @@ class BatchProcessor:
464
466
  self.state_file = self._get_state_file_path()
465
467
  self.report_file = self._get_report_file_path()
466
468
  self.state: BatchState | None = None
467
- self.console = Console()
469
+ self.console = get_console()
468
470
  # Collect image analysis results for JSON aggregation
469
471
  self.image_analysis_results: list[ImageAnalysisResult] = []
470
472
 
473
+ # Optimization: Lock for state saving to prevent IO congestion
474
+ import threading
475
+
476
+ self._save_lock = threading.Lock()
477
+
471
478
  # Live display state (managed by start_live_display/stop_live_display)
472
479
  self._live: Live | None = None
473
480
  self._log_panel: LogPanel | None = None
@@ -515,7 +522,7 @@ class BatchProcessor:
515
522
  "options": key_options,
516
523
  }
517
524
  hash_str = json.dumps(hash_params, sort_keys=True)
518
- return hashlib.md5(hash_str.encode()).hexdigest()[:6]
525
+ return hashlib.md5(hash_str.encode(), usedforsecurity=False).hexdigest()[:6]
519
526
 
520
527
  def _get_state_file_path(self) -> Path:
521
528
  """Generate state file path for resume capability.
@@ -543,11 +550,17 @@ class BatchProcessor:
543
550
  return base_path
544
551
  else: # rename
545
552
  seq = 2
546
- while True:
553
+ max_seq = 9999 # Safety limit to prevent infinite loop
554
+ while seq <= max_seq:
547
555
  new_path = reports_dir / f"markitai.{self.task_hash}.v{seq}.report.json"
548
556
  if not new_path.exists():
549
557
  return new_path
550
558
  seq += 1
559
+ # Fallback: use timestamp if too many versions exist
560
+ import time
561
+
562
+ ts = int(time.time())
563
+ return reports_dir / f"markitai.{self.task_hash}.{ts}.report.json"
551
564
 
552
565
  def start_live_display(
553
566
  self,
@@ -807,6 +820,7 @@ class BatchProcessor:
807
820
  Optimized with interval-based throttling:
808
821
  - Checks interval BEFORE serialization to avoid unnecessary work
809
822
  - Uses minimal serialization when possible
823
+ - Uses thread lock to prevent concurrent disk writes
810
824
 
811
825
  Args:
812
826
  force: Force save even if interval hasn't passed
@@ -816,27 +830,35 @@ class BatchProcessor:
816
830
  return
817
831
 
818
832
  now = datetime.now().astimezone()
819
- interval = getattr(self.config, "state_flush_interval_seconds", 0) or 0
833
+ # Default to 5 seconds if not specified in config to prevent $O(N^2)$ IO
834
+ interval = getattr(self.config, "state_flush_interval_seconds", 5) or 5
820
835
 
821
836
  # Check interval BEFORE any serialization work (optimization)
822
- if not force and interval > 0:
837
+ if not force:
823
838
  last_saved = getattr(self, "_last_state_save", None)
824
839
  if last_saved and (now - last_saved).total_seconds() < interval:
825
840
  return # Skip: interval not passed, no work done
826
841
 
827
- self.state.updated_at = now.isoformat()
842
+ # Ensure only one thread is writing at a time
843
+ if not self._save_lock.acquire(blocking=force):
844
+ return # Skip if another thread is already saving, unless forced
828
845
 
829
- # Build minimal state document (only what's needed for resume)
830
- state_data = self.state.to_minimal_dict()
846
+ try:
847
+ self.state.updated_at = now.isoformat()
848
+
849
+ # Build minimal state document (only what's needed for resume)
850
+ state_data = self.state.to_minimal_dict()
831
851
 
832
- # Ensure states directory exists
833
- self.state_file.parent.mkdir(parents=True, exist_ok=True)
852
+ # Ensure states directory exists
853
+ self.state_file.parent.mkdir(parents=True, exist_ok=True)
834
854
 
835
- atomic_write_json(self.state_file, state_data, order_func=order_state)
836
- self._last_state_save = now
855
+ atomic_write_json(self.state_file, state_data, order_func=order_state)
856
+ self._last_state_save = now
837
857
 
838
- if log:
839
- logger.info(f"State file saved: {self.state_file.resolve()}")
858
+ if log:
859
+ logger.info(f"State file saved: {self.state_file.resolve()}")
860
+ finally:
861
+ self._save_lock.release()
840
862
 
841
863
  def _compute_summary(self) -> dict[str, Any]:
842
864
  """Compute summary statistics for report."""
@@ -1135,8 +1157,10 @@ class BatchProcessor:
1135
1157
 
1136
1158
  except Exception as e:
1137
1159
  file_state.status = FileStatus.FAILED
1138
- file_state.error = str(e)
1139
- logger.error(f"Failed to process {file_path.name}: {e}")
1160
+ file_state.error = format_error_message(e)
1161
+ logger.error(
1162
+ f"Failed to process {file_path.name}: {format_error_message(e)}"
1163
+ )
1140
1164
 
1141
1165
  finally:
1142
1166
  end_time = asyncio.get_event_loop().time()
@@ -0,0 +1,52 @@
1
+ """CLI package for Markitai.
2
+
3
+ This package provides the command-line interface for Markitai.
4
+
5
+ Usage:
6
+ from markitai.cli import app
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ # Re-export CLI app
12
+ from markitai.cli.main import app
13
+
14
+ # Re-export validators from processors
15
+ from markitai.cli.processors.validators import (
16
+ warn_case_sensitivity_mismatches as _warn_case_sensitivity_mismatches,
17
+ )
18
+
19
+ # Re-export utilities from refactored modules
20
+ from markitai.utils.cli_helpers import (
21
+ compute_task_hash,
22
+ get_report_file_path,
23
+ is_url,
24
+ sanitize_filename,
25
+ url_to_filename,
26
+ )
27
+ from markitai.utils.output import resolve_output_path
28
+ from markitai.utils.progress import ProgressReporter
29
+
30
+ # Re-export from workflow helpers
31
+ from markitai.workflow.helpers import write_images_json
32
+
33
+ # Re-export types from workflow for backward compatibility
34
+ from markitai.workflow.single import ImageAnalysisResult
35
+
36
+ # Backward compatibility alias (deprecated, use sanitize_filename instead)
37
+ _sanitize_filename = sanitize_filename
38
+
39
+ __all__ = [
40
+ "app",
41
+ "ProgressReporter",
42
+ "is_url",
43
+ "url_to_filename",
44
+ "sanitize_filename",
45
+ "_sanitize_filename", # Deprecated alias
46
+ "_warn_case_sensitivity_mismatches",
47
+ "compute_task_hash",
48
+ "get_report_file_path",
49
+ "resolve_output_path",
50
+ "write_images_json",
51
+ "ImageAnalysisResult",
52
+ ]
@@ -0,0 +1,18 @@
1
+ """CLI commands package.
2
+
3
+ This package contains CLI command groups for Markitai.
4
+
5
+ Available command groups:
6
+ - config: Configuration management commands
7
+ - cache: Cache management commands
8
+ - doctor: System health and dependency checking command
9
+ - check_deps: Alias for doctor (backward compatibility)
10
+ """
11
+
12
+ from __future__ import annotations
13
+
14
+ from markitai.cli.commands.cache import cache
15
+ from markitai.cli.commands.config import config
16
+ from markitai.cli.commands.doctor import check_deps, doctor
17
+
18
+ __all__ = ["cache", "config", "doctor", "check_deps"]