PyPI - content-core - Versions diffs - 1.2.3__tar.gz → 1.3.1__tar.gz - Mend

content-core 1.2.3tar.gz → 1.3.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of content-core might be problematic. Click here for more details.

Files changed (89) hide show

{content_core-1.2.3 → content_core-1.3.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: content-core
-Version: 1.2.3
+Version: 1.3.1
 Summary: Extract what matters from any media source. Available as Python Library, macOS Service, CLI and MCP Server
 Author-email: LUIS NOVO <lfnovo@gmail.com>
 License-File: LICENSE
@@ -11,6 +11,7 @@ Requires-Dist: asciidoc>=10.2.1
 Requires-Dist: bs4>=0.0.2
 Requires-Dist: dicttoxml>=1.7.16
 Requires-Dist: esperanto>=1.2.0
+Requires-Dist: fastmcp>=2.10.0
 Requires-Dist: firecrawl-py>=2.7.0
 Requires-Dist: jinja2>=3.1.6
 Requires-Dist: langdetect>=1.0.9
@@ -32,8 +33,6 @@ Requires-Dist: validators>=0.34.0
 Requires-Dist: youtube-transcript-api>=1.0.3
 Provides-Extra: docling
 Requires-Dist: docling>=2.34.0; extra == 'docling'
-Provides-Extra: mcp
-Requires-Dist: fastmcp>=0.5.0; extra == 'mcp'
 Description-Content-Type: text/markdown
 # Content Core
@@ -118,11 +117,11 @@ pip install content-core
 # With enhanced document processing (adds Docling)
 pip install content-core[docling]
-# With MCP server support
-pip install content-core[mcp]
+# With MCP server support (now included by default)
+pip install content-core
-# Full installation
-pip install content-core[docling,mcp]
+# Full installation (with enhanced document processing)
+pip install content-core[docling]
 ```
 Alternatively, if you’re developing locally:
@@ -268,11 +267,11 @@ Content Core includes a Model Context Protocol (MCP) server that enables seamles
 ### Quick Setup with Claude Desktop
 ```bash
-# Install with MCP support
-pip install content-core[mcp]
+# Install Content Core (MCP server included)
+pip install content-core
 # Or use directly with uvx (no installation required)
-uvx --from "content-core[mcp]" content-core-mcp
+uvx --from "content-core" content-core-mcp
 ```
 Add to your `claude_desktop_config.json`:
@@ -283,7 +282,7 @@ Add to your `claude_desktop_config.json`:
       "command": "uvx",
       "args": [
         "--from",
-        "content-core[mcp]",
+        "content-core",
         "content-core-mcp"
       ]
     }

{content_core-1.2.3 → content_core-1.3.1}/README.md RENAMED Viewed

@@ -80,11 +80,11 @@ pip install content-core
 # With enhanced document processing (adds Docling)
 pip install content-core[docling]
-# With MCP server support
-pip install content-core[mcp]
+# With MCP server support (now included by default)
+pip install content-core
-# Full installation
-pip install content-core[docling,mcp]
+# Full installation (with enhanced document processing)
+pip install content-core[docling]
 ```
 Alternatively, if you’re developing locally:
@@ -230,11 +230,11 @@ Content Core includes a Model Context Protocol (MCP) server that enables seamles
 ### Quick Setup with Claude Desktop
 ```bash
-# Install with MCP support
-pip install content-core[mcp]
+# Install Content Core (MCP server included)
+pip install content-core
 # Or use directly with uvx (no installation required)
-uvx --from "content-core[mcp]" content-core-mcp
+uvx --from "content-core" content-core-mcp
 ```
 Add to your `claude_desktop_config.json`:
@@ -245,7 +245,7 @@ Add to your `claude_desktop_config.json`:
       "command": "uvx",
       "args": [
         "--from",
-        "content-core[mcp]",
+        "content-core",
         "content-core-mcp"
       ]
     }

{content_core-1.2.3 → content_core-1.3.1}/docs/mcp.md RENAMED Viewed

@@ -20,8 +20,8 @@ The [Model Context Protocol (MCP)](https://modelcontextprotocol.io/) is an open
 ### Option 1: Install with pip (Recommended for local development)
 ```bash
-# Install Content Core with MCP support
-pip install content-core[mcp]
+# Install Content Core (MCP server included by default)
+pip install content-core
 # The content-core-mcp command becomes available
 content-core-mcp
@@ -31,7 +31,7 @@ content-core-mcp
 ```bash
 # Run MCP server directly without installation
-uvx --from "content-core[mcp]" content-core-mcp
+uvx --from "content-core" content-core-mcp
 # Also works for CLI tools
 uvx --from "content-core" ccore https://example.com
@@ -58,7 +58,7 @@ Add Content Core to your Claude Desktop configuration file:
       "command": "uvx",
       "args": [
         "--from",
-        "content-core[mcp]",
+        "content-core",
         "content-core-mcp"
       ]
     }
@@ -102,7 +102,7 @@ For optimal functionality, you'll need to configure API keys. Here's what each k
       "command": "uvx",
       "args": [
         "--from",
-        "content-core[mcp]",
+        "content-core",
         "content-core-mcp"
       ],
       "env": {
@@ -342,13 +342,13 @@ export PROMPT_PATH="/path/to/your/custom/prompts"
 content-core-mcp
 # Or with uvx
-uvx --from "content-core[mcp]" content-core-mcp
+uvx --from "content-core" content-core-mcp
 ```
 **Missing dependencies:**
 ```bash
-# Reinstall with MCP dependencies
-pip install --force-reinstall content-core[mcp]
+# Reinstall Content Core
+pip install --force-reinstall content-core
 ```
 **Audio/video extraction failing:**

content_core-1.3.1/examples/main.py ADDED Viewed

@@ -0,0 +1,241 @@
+#!/usr/bin/env python3
+"""
+MarkDowny - Convert files and URLs to Markdown using Microsoft's MarkItDown library.
+This script processes all files in the input_content/ directory and URLs from urls.txt,
+converting them to Markdown format and saving the results to separate files.
+"""
+import os
+import sys
+from pathlib import Path
+from typing import List, Tuple
+from urllib.parse import urlparse
+from loguru import logger
+from markitdown import MarkItDown
+def setup_logging() -> None:
+    """Configure logging with loguru."""
+    logger.remove()
+    logger.add(
+        sys.stderr,
+        format="<green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{level: <8}</level> | <cyan>{name}</cyan>:<cyan>{function}</cyan>:<cyan>{line}</cyan> - <level>{message}</level>",
+        level="INFO"
+    )
+    logger.add(
+        "processing.log",
+        format="{time:YYYY-MM-DD HH:mm:ss} | {level: <8} | {name}:{function}:{line} - {message}",
+        level="DEBUG",
+        rotation="10 MB"
+    )
+def create_output_directory(output_dir: Path) -> None:
+    """Create output directory if it doesn't exist."""
+    output_dir.mkdir(exist_ok=True)
+    logger.info(f"Output directory: {output_dir}")
+def sanitize_filename(filename: str) -> str:
+    """Sanitize filename for safe file system usage."""
+    # Remove or replace problematic characters
+    invalid_chars = '<>:"/\\|?*'
+    for char in invalid_chars:
+        filename = filename.replace(char, '_')
+    return filename.strip()
+def process_file(md_converter: MarkItDown, file_path: Path, output_dir: Path) -> bool:
+    """
+    Process a single file and convert it to Markdown.
+    Args:
+        md_converter: MarkItDown instance
+        file_path: Path to the input file
+        output_dir: Directory to save the output
+    Returns:
+        bool: True if successful, False otherwise
+    """
+    try:
+        logger.info(f"Processing file: {file_path.name}")
+        # Convert file to markdown
+        result = md_converter.convert(str(file_path))
+        # Create output filename
+        base_name = file_path.name  # Use full filename with extension
+        safe_name = sanitize_filename(base_name.replace('.', '_'))
+        output_filename = f"{safe_name}_converted.md"
+        output_path = output_dir / output_filename
+        # Create markdown content with metadata
+        content = f"""# Converted from: {file_path.name}
+**Source File:** {file_path.name}
+**Source Path:** {file_path}
+**Conversion Date:** {result.title if hasattr(result, 'title') else 'N/A'}
+**File Size:** {file_path.stat().st_size} bytes
+---
+{result.text_content}
+"""
+        # Write to output file
+        with open(output_path, 'w', encoding='utf-8') as f:
+            f.write(content)
+        logger.success(f"Successfully converted {file_path.name} -> {output_filename}")
+        return True
+    except Exception as e:
+        logger.error(f"Failed to process {file_path.name}: {str(e)}")
+        return False
+def process_url(md_converter: MarkItDown, url: str, output_dir: Path, index: int) -> bool:
+    """
+    Process a single URL and convert it to Markdown.
+    Args:
+        md_converter: MarkItDown instance
+        url: URL to process
+        output_dir: Directory to save the output
+        index: Index for naming the output file
+    Returns:
+        bool: True if successful, False otherwise
+    """
+    try:
+        logger.info(f"Processing URL: {url}")
+        # Convert URL to markdown
+        result = md_converter.convert(url)
+        # Create output filename based on URL
+        parsed_url = urlparse(url)
+        domain = parsed_url.netloc.replace('www.', '')
+        safe_domain = sanitize_filename(domain)
+        output_filename = f"url_{index:02d}_{safe_domain}_converted.md"
+        output_path = output_dir / output_filename
+        # Create markdown content with metadata
+        content = f"""# Converted from URL: {url}
+**Source URL:** {url}
+**Domain:** {parsed_url.netloc}
+**Conversion Date:** {result.title if hasattr(result, 'title') else 'N/A'}
+---
+{result.text_content}
+"""
+        # Write to output file
+        with open(output_path, 'w', encoding='utf-8') as f:
+            f.write(content)
+        logger.success(f"Successfully converted {url} -> {output_filename}")
+        return True
+    except Exception as e:
+        logger.error(f"Failed to process URL {url}: {str(e)}")
+        return False
+def load_urls(urls_file: Path) -> List[str]:
+    """Load URLs from the urls.txt file."""
+    try:
+        with open(urls_file, 'r', encoding='utf-8') as f:
+            urls = [line.strip() for line in f if line.strip() and not line.strip().startswith('#')]
+        logger.info(f"Loaded {len(urls)} URLs from {urls_file}")
+        return urls
+    except Exception as e:
+        logger.error(f"Failed to load URLs from {urls_file}: {str(e)}")
+        return []
+def get_input_files(input_dir: Path) -> List[Path]:
+    """Get all files from the input directory, excluding audio/video files."""
+    try:
+        # Skip audio/video files for now
+        skip_extensions = [] #{'.mp3', '.mp4', '.wav', '.avi', '.mov', '.mkv', '.m4a'}
+        files = [f for f in input_dir.iterdir()
+                if f.is_file() and f.suffix.lower() not in skip_extensions]
+        logger.info(f"Found {len(files)} files in {input_dir} (excluding audio/video)")
+        return files
+    except Exception as e:
+        logger.error(f"Failed to read files from {input_dir}: {str(e)}")
+        return []
+def main():
+    """Main function to orchestrate the conversion process."""
+    setup_logging()
+    logger.info("Starting MarkDowny processing...")
+    # Setup paths
+    project_root = Path(__file__).parent
+    input_dir = project_root / "input_content"
+    urls_file = project_root / "urls.txt"
+    output_dir = project_root / "output"
+    # Create output directory
+    create_output_directory(output_dir)
+    # Initialize MarkItDown
+    md_converter = MarkItDown()
+    # Process files
+    files_processed = 0
+    files_failed = 0
+    if input_dir.exists():
+        input_files = get_input_files(input_dir)
+        logger.info(f"Processing {len(input_files)} files...")
+        for file_path in input_files:
+            if process_file(md_converter, file_path, output_dir):
+                files_processed += 1
+            else:
+                files_failed += 1
+    else:
+        logger.warning(f"Input directory {input_dir} does not exist")
+    # Process URLs
+    urls_processed = 0
+    urls_failed = 0
+    if urls_file.exists():
+        urls = load_urls(urls_file)
+        logger.info(f"Processing {len(urls)} URLs...")
+        for index, url in enumerate(urls, 1):
+            if process_url(md_converter, url, output_dir, index):
+                urls_processed += 1
+            else:
+                urls_failed += 1
+    else:
+        logger.warning(f"URLs file {urls_file} does not exist")
+    # Summary
+    logger.info("=" * 50)
+    logger.info("PROCESSING SUMMARY")
+    logger.info("=" * 50)
+    logger.info(f"Files: {files_processed} successful, {files_failed} failed")
+    logger.info(f"URLs: {urls_processed} successful, {urls_failed} failed")
+    logger.info(f"Total: {files_processed + urls_processed} successful, {files_failed + urls_failed} failed")
+    logger.info(f"Output directory: {output_dir}")
+    if files_failed + urls_failed > 0:
+        logger.warning("Some items failed to process. Check the logs for details.")
+        sys.exit(1)
+    else:
+        logger.success("All items processed successfully!")
+if __name__ == "__main__":
+    main()

{content_core-1.2.3 → content_core-1.3.1}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "content-core"
-version = "1.2.3"
+version = "1.3.1"
 description = "Extract what matters from any media source. Available as Python Library, macOS Service, CLI and MCP Server"
 readme = "README.md"
 homepage = "https://github.com/lfnovo/content-core"
@@ -34,10 +34,10 @@ dependencies = [
     "asciidoc>=10.2.1",
     "python-magic-bin==0.4.14; sys_platform == 'win32'",
     "pytubefix>=9.1.1",
+    "fastmcp>=2.10.0",
 ]
 [project.optional-dependencies]
-mcp = ["fastmcp>=0.5.0"]
 docling = ["docling>=2.34.0"]
 [project.scripts]

content-core 1.2.3__tar.gz → 1.3.1__tar.gz

Potentially problematic release.

content-core 1.2.3tar.gz → 1.3.1tar.gz