PyPI - markitai - Versions diffs - 0.3.1__tar.gz → 0.4.1__tar.gz - Mend

markitai 0.3.1tar.gz → 0.4.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (164) hide show

{markitai-0.3.1 → markitai-0.4.1}/.gitignore RENAMED Viewed

@@ -38,6 +38,7 @@ ENV/
 # Testing
 .pytest_cache/
 .coverage
+coverage.xml
 htmlcov/
 .tox/
 .nox/
@@ -46,6 +47,9 @@ htmlcov/
 .mypy_cache/
 .pytype/
+# Linting
+.ruff_cache/
 # Markitai output
 output/
 output-*/
@@ -55,6 +59,7 @@ markitai.json
 # Logs
 logs/
+logs_*/
 *.log
 # Environment variables (API keys)
@@ -66,13 +71,8 @@ logs/
 .DS_Store
 Thumbs.db
-# SQLite cache (including WAL mode files)
-cache.db
-cache.db-wal
-cache.db-shm
-*.db-wal
-*.db-shm
-fetch_cache.db
+# Markitai cache directory
+.markitai/
 # VitePress (website)
 website/node_modules/

{markitai-0.3.1 → markitai-0.4.1}/PKG-INFO RENAMED Viewed

@@ -1,7 +1,7 @@
 Metadata-Version: 2.4
 Name: markitai
-Version: 0.3.1
-Summary: Document to Markdown converter with LLM enhancement
+Version: 0.4.1
+Summary: Opinionated Markdown converter with native LLM enhancement support
 Project-URL: Homepage, https://markitai.ynewtime.com
 Project-URL: Documentation, https://markitai.ynewtime.com/guide/getting-started
 Project-URL: Repository, https://github.com/Ynewtime/markitai
@@ -20,7 +20,7 @@ Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
 Classifier: Topic :: Text Processing :: Markup :: Markdown
 Classifier: Topic :: Utilities
-Requires-Python: >=3.11
+Requires-Python: <3.14,>=3.11
 Requires-Dist: aiofiles>=25.1.0
 Requires-Dist: click>=8.1.0
 Requires-Dist: instructor>=1.14.0
@@ -36,10 +36,21 @@ Requires-Dist: pywin32>=310; sys_platform == 'win32'
 Requires-Dist: rapidocr>=3.5.0
 Requires-Dist: rich>=14.2.0
 Provides-Extra: all
+Requires-Dist: claude-agent-sdk>=0.1.0; extra == 'all'
+Requires-Dist: github-copilot-sdk>=0.1.0; extra == 'all'
+Requires-Dist: playwright>=1.50.0; extra == 'all'
+Provides-Extra: browser
+Requires-Dist: playwright>=1.50.0; extra == 'browser'
+Provides-Extra: claude-agent
+Requires-Dist: claude-agent-sdk>=0.1.0; extra == 'claude-agent'
+Provides-Extra: copilot
+Requires-Dist: github-copilot-sdk>=0.1.0; extra == 'copilot'
 Description-Content-Type: text/markdown
 # Markitai
+English | [简体中文](./README_ZH.md)
 Opinionated Markdown converter with native LLM enhancement support.
 ## Features
@@ -66,11 +77,11 @@ irm https://raw.githubusercontent.com/Ynewtime/markitai/main/scripts/setup.ps1 |
 ### Manual Installation
 ```bash
-# Requires Python 3.11+
+# Requires Python 3.11-3.13 (3.14 not yet supported)
 uv tool install markitai
-# Or using pip
-pip install --user markitai
+# Or using uv pip (for virtual environment)
+uv pip install markitai
 ```
 ## Quick Start
@@ -129,10 +140,34 @@ markitai cache stats
 # Clear cache
 markitai cache clear
+# Check system health and dependencies
+markitai doctor
 ```
 Config file location: `./markitai.json` or `~/.markitai/config.json`
+### Local Providers (Subscription-based)
+Use your existing Claude Code or GitHub Copilot subscription:
+```bash
+# Claude Agent (requires Claude Code CLI)
+markitai document.pdf --llm  # Configure claude-agent/sonnet in config
+# GitHub Copilot (requires Copilot CLI)
+markitai document.pdf --llm  # Configure copilot/gpt-5.2 in config
+```
+Install CLI tools:
+```bash
+# Claude Code CLI
+curl -fsSL https://claude.ai/install.sh | bash
+# GitHub Copilot CLI
+curl -fsSL https://gh.io/copilot-install | bash
+```
 ## Environment Variables
 | Variable | Description |

{markitai-0.3.1 → markitai-0.4.1}/README.md RENAMED Viewed

@@ -1,5 +1,7 @@
 # Markitai
+English | [简体中文](./README_ZH.md)
 Opinionated Markdown converter with native LLM enhancement support.
 ## Features
@@ -26,11 +28,11 @@ irm https://raw.githubusercontent.com/Ynewtime/markitai/main/scripts/setup.ps1 |
 ### Manual Installation
 ```bash
-# Requires Python 3.11+
+# Requires Python 3.11-3.13 (3.14 not yet supported)
 uv tool install markitai
-# Or using pip
-pip install --user markitai
+# Or using uv pip (for virtual environment)
+uv pip install markitai
 ```
 ## Quick Start
@@ -89,10 +91,34 @@ markitai cache stats
 # Clear cache
 markitai cache clear
+# Check system health and dependencies
+markitai doctor
 ```
 Config file location: `./markitai.json` or `~/.markitai/config.json`
+### Local Providers (Subscription-based)
+Use your existing Claude Code or GitHub Copilot subscription:
+```bash
+# Claude Agent (requires Claude Code CLI)
+markitai document.pdf --llm  # Configure claude-agent/sonnet in config
+# GitHub Copilot (requires Copilot CLI)
+markitai document.pdf --llm  # Configure copilot/gpt-5.2 in config
+```
+Install CLI tools:
+```bash
+# Claude Code CLI
+curl -fsSL https://claude.ai/install.sh | bash
+# GitHub Copilot CLI
+curl -fsSL https://gh.io/copilot-install | bash
+```
 ## Environment Variables
 | Variable | Description |

{markitai-0.3.1 → markitai-0.4.1}/pyproject.toml RENAMED Viewed

@@ -1,10 +1,10 @@
 [project]
 name = "markitai"
-version = "0.3.1"
-description = "Document to Markdown converter with LLM enhancement"
+version = "0.4.1"
+description = "Opinionated Markdown converter with native LLM enhancement support"
 license = "MIT"
 readme = "README.md"
-requires-python = ">=3.11"
+requires-python = ">=3.11,<3.14"
 authors = [
     { name = "Ynewtime", email = "longqiliuye@gmail.com" }
 ]
@@ -49,7 +49,10 @@ Changelog = "https://github.com/Ynewtime/markitai/blob/main/CHANGELOG.md"
 markitai = "markitai.cli:app"
 [project.optional-dependencies]
-all = []
+claude-agent = ["claude-agent-sdk>=0.1.0"]
+copilot = ["github-copilot-sdk>=0.1.0"]
+browser = ["playwright>=1.50.0"]
+all = ["claude-agent-sdk>=0.1.0", "github-copilot-sdk>=0.1.0", "playwright>=1.50.0"]
 [dependency-groups]
 dev = [
@@ -72,9 +75,13 @@ packages = ["src/markitai"]
 testpaths = ["tests"]
 asyncio_mode = "auto"
 asyncio_default_fixture_loop_scope = "function"
+markers = [
+    "slow: marks tests as slow (deselect with '-m \"not slow\"')",
+    "network: marks tests that require network access (deselect with '-m \"not network\"')",
+]
 [tool.ruff]
-target-version = "py311"
+target-version = "py313"
 line-length = 88
 src = ["src", "tests"]
@@ -115,13 +122,15 @@ skip-magic-trailing-comma = false
 line-ending = "auto"
 [tool.pyright]
-pythonVersion = "3.11"
+pythonVersion = "3.13"
 typeCheckingMode = "basic"
 include = ["src"]
 exclude = ["tests", "**/__pycache__"]
 venvPath = "../.."
 venv = ".venv"
-reportMissingImports = true
+# Allow optional dependencies to be missing (claude-agent-sdk)
+# These are runtime-checked before import using importlib.util.find_spec
+reportMissingImports = "warning"
 reportMissingTypeStubs = false
 reportUnusedImport = true
 reportUnusedVariable = "warning"

markitai-0.4.1/src/markitai/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""Markitai - Opinionated Markdown converter with native LLM enhancement support."""
+__version__ = "0.4.1"

{markitai-0.3.1 → markitai-0.4.1}/src/markitai/batch.py RENAMED Viewed

@@ -13,7 +13,7 @@ from pathlib import Path
 from typing import TYPE_CHECKING, Any
 from loguru import logger
-from rich.console import Console, Group
+from rich.console import Group
 from rich.live import Live
 from rich.panel import Panel
 from rich.progress import (
@@ -28,9 +28,11 @@ from rich.progress import (
 from rich.table import Table
 from rich.text import Text
+from markitai.cli.console import get_console
 from markitai.constants import DEFAULT_LOG_PANEL_MAX_LINES
 from markitai.json_order import order_report, order_state
 from markitai.security import atomic_write_json
+from markitai.utils.text import format_error_message
 if TYPE_CHECKING:
     from markitai.config import BatchConfig
@@ -464,10 +466,15 @@ class BatchProcessor:
         self.state_file = self._get_state_file_path()
         self.report_file = self._get_report_file_path()
         self.state: BatchState | None = None
-        self.console = Console()
+        self.console = get_console()
         # Collect image analysis results for JSON aggregation
         self.image_analysis_results: list[ImageAnalysisResult] = []
+        # Optimization: Lock for state saving to prevent IO congestion
+        import threading
+        self._save_lock = threading.Lock()
         # Live display state (managed by start_live_display/stop_live_display)
         self._live: Live | None = None
         self._log_panel: LogPanel | None = None
@@ -515,7 +522,7 @@ class BatchProcessor:
             "options": key_options,
         }
         hash_str = json.dumps(hash_params, sort_keys=True)
-        return hashlib.md5(hash_str.encode()).hexdigest()[:6]
+        return hashlib.md5(hash_str.encode(), usedforsecurity=False).hexdigest()[:6]
     def _get_state_file_path(self) -> Path:
         """Generate state file path for resume capability.
@@ -543,11 +550,17 @@ class BatchProcessor:
             return base_path
         else:  # rename
             seq = 2
-            while True:
+            max_seq = 9999  # Safety limit to prevent infinite loop
+            while seq <= max_seq:
                 new_path = reports_dir / f"markitai.{self.task_hash}.v{seq}.report.json"
                 if not new_path.exists():
                     return new_path
                 seq += 1
+            # Fallback: use timestamp if too many versions exist
+            import time
+            ts = int(time.time())
+            return reports_dir / f"markitai.{self.task_hash}.{ts}.report.json"
     def start_live_display(
         self,
@@ -807,6 +820,7 @@ class BatchProcessor:
         Optimized with interval-based throttling:
         - Checks interval BEFORE serialization to avoid unnecessary work
         - Uses minimal serialization when possible
+        - Uses thread lock to prevent concurrent disk writes
         Args:
             force: Force save even if interval hasn't passed
@@ -816,27 +830,35 @@ class BatchProcessor:
             return
         now = datetime.now().astimezone()
-        interval = getattr(self.config, "state_flush_interval_seconds", 0) or 0
+        # Default to 5 seconds if not specified in config to prevent $O(N^2)$ IO
+        interval = getattr(self.config, "state_flush_interval_seconds", 5) or 5
         # Check interval BEFORE any serialization work (optimization)
-        if not force and interval > 0:
+        if not force:
             last_saved = getattr(self, "_last_state_save", None)
             if last_saved and (now - last_saved).total_seconds() < interval:
                 return  # Skip: interval not passed, no work done
-        self.state.updated_at = now.isoformat()
+        # Ensure only one thread is writing at a time
+        if not self._save_lock.acquire(blocking=force):
+            return  # Skip if another thread is already saving, unless forced
-        # Build minimal state document (only what's needed for resume)
-        state_data = self.state.to_minimal_dict()
+        try:
+            self.state.updated_at = now.isoformat()
+            # Build minimal state document (only what's needed for resume)
+            state_data = self.state.to_minimal_dict()
-        # Ensure states directory exists
-        self.state_file.parent.mkdir(parents=True, exist_ok=True)
+            # Ensure states directory exists
+            self.state_file.parent.mkdir(parents=True, exist_ok=True)
-        atomic_write_json(self.state_file, state_data, order_func=order_state)
-        self._last_state_save = now
+            atomic_write_json(self.state_file, state_data, order_func=order_state)
+            self._last_state_save = now
-        if log:
-            logger.info(f"State file saved: {self.state_file.resolve()}")
+            if log:
+                logger.info(f"State file saved: {self.state_file.resolve()}")
+        finally:
+            self._save_lock.release()
     def _compute_summary(self) -> dict[str, Any]:
         """Compute summary statistics for report."""
@@ -1135,8 +1157,10 @@ class BatchProcessor:
             except Exception as e:
                 file_state.status = FileStatus.FAILED
-                file_state.error = str(e)
-                logger.error(f"Failed to process {file_path.name}: {e}")
+                file_state.error = format_error_message(e)
+                logger.error(
+                    f"Failed to process {file_path.name}: {format_error_message(e)}"
+                )
             finally:
                 end_time = asyncio.get_event_loop().time()

markitai-0.4.1/src/markitai/cli/__init__.py ADDED Viewed

@@ -0,0 +1,52 @@
+"""CLI package for Markitai.
+This package provides the command-line interface for Markitai.
+Usage:
+    from markitai.cli import app
+"""
+from __future__ import annotations
+# Re-export CLI app
+from markitai.cli.main import app
+# Re-export validators from processors
+from markitai.cli.processors.validators import (
+    warn_case_sensitivity_mismatches as _warn_case_sensitivity_mismatches,
+)
+# Re-export utilities from refactored modules
+from markitai.utils.cli_helpers import (
+    compute_task_hash,
+    get_report_file_path,
+    is_url,
+    sanitize_filename,
+    url_to_filename,
+)
+from markitai.utils.output import resolve_output_path
+from markitai.utils.progress import ProgressReporter
+# Re-export from workflow helpers
+from markitai.workflow.helpers import write_images_json
+# Re-export types from workflow for backward compatibility
+from markitai.workflow.single import ImageAnalysisResult
+# Backward compatibility alias (deprecated, use sanitize_filename instead)
+_sanitize_filename = sanitize_filename
+__all__ = [
+    "app",
+    "ProgressReporter",
+    "is_url",
+    "url_to_filename",
+    "sanitize_filename",
+    "_sanitize_filename",  # Deprecated alias
+    "_warn_case_sensitivity_mismatches",
+    "compute_task_hash",
+    "get_report_file_path",
+    "resolve_output_path",
+    "write_images_json",
+    "ImageAnalysisResult",
+]

markitai-0.4.1/src/markitai/cli/commands/__init__.py ADDED Viewed

@@ -0,0 +1,18 @@
+"""CLI commands package.
+This package contains CLI command groups for Markitai.
+Available command groups:
+- config: Configuration management commands
+- cache: Cache management commands
+- doctor: System health and dependency checking command
+- check_deps: Alias for doctor (backward compatibility)
+"""
+from __future__ import annotations
+from markitai.cli.commands.cache import cache
+from markitai.cli.commands.config import config
+from markitai.cli.commands.doctor import check_deps, doctor
+__all__ = ["cache", "config", "doctor", "check_deps"]

markitai 0.3.1__tar.gz → 0.4.1__tar.gz

markitai 0.3.1tar.gz → 0.4.1tar.gz