PyPI - content-core - Versions diffs - 1.0.4__py3-none-any.whl → 1.1.0__py3-none-any.whl - Mend

content-core 1.0.4py3-none-any.whl → 1.1.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of content-core might be problematic. Click here for more details.

Files changed (9) hide show

content_core/config.py CHANGED Viewed

@@ -1,6 +1,5 @@
 import os
 import pkgutil
-import os  # needed for load_config env/path checks
 import yaml
 from dotenv import load_dotenv

content_core/mcp/__init__.py ADDED Viewed

@@ -0,0 +1,5 @@
+"""Content Core MCP Server module."""
+from .server import mcp, main
+__all__ = ["mcp", "main"]

content_core/mcp/server.py ADDED Viewed

@@ -0,0 +1,211 @@
+"""Content Core MCP Server implementation."""
+import os
+import sys
+from contextlib import contextmanager
+from datetime import datetime
+from io import StringIO
+from pathlib import Path
+from typing import Any, Dict, Optional
+from fastmcp import FastMCP
+from loguru import logger
+# Suppress MoviePy output for MCP compatibility
+os.environ["IMAGEIO_LOG_LEVEL"] = "error"
+os.environ["FFMPEG_LOG_LEVEL"] = "error"
+# Configure loguru to not output to stdout (which would interfere with MCP)
+logger.remove()  # Remove default handler
+logger.add(sys.stderr, level="INFO")  # Add stderr handler only
+@contextmanager
+def suppress_stdout():
+    """Context manager to suppress stdout during operations that might print."""
+    original_stdout = sys.stdout
+    sys.stdout = StringIO()
+    try:
+        yield
+    finally:
+        sys.stdout = original_stdout
+# Add parent directory to path to import content_core
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+import content_core as cc
+# Initialize MCP server
+mcp = FastMCP("Content Core MCP Server")
+async def _extract_content_impl(
+    url: Optional[str] = None,
+    file_path: Optional[str] = None
+) -> Dict[str, Any]:
+    """
+    Extract content from a URL or file using Content Core's auto engine.
+    Args:
+        url: Optional URL to extract content from
+        file_path: Optional file path to extract content from
+    Returns:
+        JSON object containing extracted content and metadata
+    Raises:
+        ValueError: If neither or both url and file_path are provided
+    """
+    # Validate input - exactly one must be provided
+    if (url is None and file_path is None) or (url is not None and file_path is not None):
+        return {
+            "success": False,
+            "error": "Exactly one of 'url' or 'file_path' must be provided",
+            "source_type": None,
+            "source": None,
+            "content": None,
+            "metadata": None
+        }
+    # Determine source type and validate
+    source_type = "url" if url else "file"
+    source = url if url else file_path
+    # Additional validation for file paths
+    if file_path:
+        path = Path(file_path)
+        if not path.exists():
+            return {
+                "success": False,
+                "error": f"File not found: {file_path}",
+                "source_type": source_type,
+                "source": source,
+                "content": None,
+                "metadata": None
+            }
+        # Security check - ensure no directory traversal
+        try:
+            # Resolve to absolute path and ensure it's not trying to access sensitive areas
+            path.resolve()
+            # You might want to add additional checks here based on your security requirements
+        except Exception as e:
+            return {
+                "success": False,
+                "error": f"Invalid file path: {str(e)}",
+                "source_type": source_type,
+                "source": source,
+                "content": None,
+                "metadata": None
+            }
+    # Build extraction request
+    extraction_request = {}
+    if url:
+        extraction_request["url"] = url
+    else:
+        extraction_request["file_path"] = str(Path(file_path).resolve())
+    # Track start time
+    start_time = datetime.utcnow()
+    try:
+        # Use Content Core's extract_content with auto engine
+        logger.info(f"Extracting content from {source_type}: {source}")
+        # Suppress stdout to prevent MoviePy and other libraries from interfering with MCP protocol
+        with suppress_stdout():
+            result = await cc.extract_content(extraction_request)
+        # Calculate extraction time
+        extraction_time = (datetime.utcnow() - start_time).total_seconds()
+        # Build response - result is a ProcessSourceOutput object
+        response = {
+            "success": True,
+            "error": None,
+            "source_type": source_type,
+            "source": source,
+            "content": result.content or "",
+            "metadata": {
+                "extraction_time_seconds": extraction_time,
+                "extraction_timestamp": start_time.isoformat() + "Z",
+                "content_length": len(result.content or ""),
+                "identified_type": result.identified_type or "unknown",
+                "identified_provider": result.identified_provider or "",
+            }
+        }
+        # Add metadata from the result
+        if result.metadata:
+            response["metadata"].update(result.metadata)
+        # Add specific metadata based on source type
+        if source_type == "url":
+            if result.title:
+                response["metadata"]["title"] = result.title
+            if result.url:
+                response["metadata"]["final_url"] = result.url
+        elif source_type == "file":
+            if result.title:
+                response["metadata"]["title"] = result.title
+            if result.file_path:
+                response["metadata"]["file_path"] = result.file_path
+            response["metadata"]["file_size"] = Path(file_path).stat().st_size
+            response["metadata"]["file_extension"] = Path(file_path).suffix
+        logger.info(f"Successfully extracted content from {source_type}: {source}")
+        return response
+    except Exception as e:
+        logger.error(f"Error extracting content from {source_type} {source}: {str(e)}")
+        return {
+            "success": False,
+            "error": str(e),
+            "source_type": source_type,
+            "source": source,
+            "content": None,
+            "metadata": {
+                "extraction_timestamp": start_time.isoformat() + "Z",
+                "error_type": type(e).__name__
+            }
+        }
+@mcp.tool
+async def extract_content(
+    url: Optional[str] = None,
+    file_path: Optional[str] = None
+) -> Dict[str, Any]:
+    """
+    Extract content from a URL or file using Content Core's auto engine.
+    Args:
+        url: Optional URL to extract content from
+        file_path: Optional file path to extract content from
+    Returns:
+        JSON object containing extracted content and metadata
+    Raises:
+        ValueError: If neither or both url and file_path are provided
+    """
+    return await _extract_content_impl(url=url, file_path=file_path)
+def main():
+    """Entry point for the MCP server."""
+    # Additional MoviePy configuration to suppress all output
+    try:
+        import moviepy.config as mp_config
+        mp_config.check_and_download_cmd("ffmpeg")  # Pre-download to avoid logs later
+    except Exception:
+        pass  # Ignore if MoviePy isn't available or configured
+    logger.info("Starting Content Core MCP Server")
+    # Run with STDIO transport for MCP compatibility
+    mcp.run()
+if __name__ == "__main__":
+    main()

content_core/models.py CHANGED Viewed

@@ -1,5 +1,4 @@
 from esperanto import AIFactory
-from esperanto.providers.stt import SpeechToTextModel
 from .config import CONFIG
 class ModelFactory:

{content_core-1.0.4.dist-info → content_core-1.1.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: content-core
-Version: 1.0.4
+Version: 1.1.0
 Summary: Extract what matters from any media source
 Author-email: LUIS NOVO <lfnovo@gmail.com>
 License-File: LICENSE
@@ -31,6 +31,8 @@ Requires-Dist: pytubefix>=9.1.1
 Requires-Dist: readability-lxml>=0.8.4.1
 Requires-Dist: validators>=0.34.0
 Requires-Dist: youtube-transcript-api>=1.0.3
+Provides-Extra: mcp
+Requires-Dist: fastmcp>=0.5.0; extra == 'mcp'
 Description-Content-Type: text/markdown
 # Content Core
@@ -57,6 +59,7 @@ The primary goal of Content Core is to simplify the process of ingesting content
     * For files: Tries Docling extraction first (for robust document parsing), then falls back to simple extraction if needed.
     * You can override this by specifying an engine, but `'auto'` is recommended for most users.
 *   **Content Cleaning (Optional):** Likely integrates with LLMs (via `prompter.py` and Jinja templates) to refine and clean the extracted content.
+*   **MCP Server:** Includes a Model Context Protocol (MCP) server for seamless integration with Claude Desktop and other MCP-compatible applications.
 *   **Asynchronous:** Built with `asyncio` for efficient I/O operations.
 ## Getting Started
@@ -66,8 +69,11 @@ The primary goal of Content Core is to simplify the process of ingesting content
 Install Content Core using `pip`:
 ```bash
-# Install the package (without Docling)
+# Install the package
 pip install content-core
+# Install with MCP server support
+pip install content-core[mcp]
 ```
 Alternatively, if you’re developing locally:
@@ -194,6 +200,38 @@ summary = await cc.summarize_content("long article text", context="explain to a
 For more information on how to use the Content Core library, including details on AI model configuration and customization, refer to our [Usage Documentation](docs/usage.md).
+## MCP Server Integration
+Content Core includes a Model Context Protocol (MCP) server that enables seamless integration with Claude Desktop and other MCP-compatible applications. The MCP server exposes Content Core's powerful extraction capabilities through a standardized protocol.
+### Quick Setup with Claude Desktop
+```bash
+# Install with MCP support
+pip install content-core[mcp]
+# Or use directly with uvx (no installation required)
+uvx --from "content-core[mcp]" content-core-mcp
+```
+Add to your `claude_desktop_config.json`:
+```json
+{
+  "mcpServers": {
+    "content-core": {
+      "command": "uvx",
+      "args": [
+        "--from",
+        "content-core[mcp]",
+        "content-core-mcp"
+      ]
+    }
+  }
+}
+```
+For detailed setup instructions, configuration options, and usage examples, see our [MCP Documentation](docs/mcp.md).
 ## Using with Langchain
 For users integrating with the [Langchain](https://python.langchain.com/) framework, `content-core` exposes a set of compatible tools. These tools, located in the `src/content_core/tools` directory, allow you to leverage `content-core` extraction, cleaning, and summarization capabilities directly within your Langchain agents and chains.

{content_core-1.0.4.dist-info → content_core-1.1.0.dist-info}/RECORD RENAMED Viewed

@@ -1,8 +1,8 @@
 content_core/__init__.py,sha256=t4xFo9f3uB2FD1tdR-7ruhMW9_ciJawQReK6iFXWfR0,6531
 content_core/cc_config.yaml,sha256=gGSPM-oO6GIHyCfDCH-cN72BgPJiRmZMgwPrrLhUmfU,851
-content_core/config.py,sha256=vbRgJy8lOTZABeY7GZc7MglNYwBQYpUNzu76kprv_c0,1854
+content_core/config.py,sha256=vyx0fioR6r0mcZfVdwAFDhFrRNoG0ZNG8RNxIDnhNlo,1802
 content_core/logging.py,sha256=oeRdWKknEolptopxF1IvnEGEc0ZUw45QXYUEZ71GcdY,438
-content_core/models.py,sha256=FBV_tV6cmI0F82WfcA6xHag-YMsxI1dIbDGWG-3Eq_Y,935
+content_core/models.py,sha256=Kt6tWdAX87eQ2tL6eTwcHU7_NIRnN4exP4RzV2WrMig,881
 content_core/models_config.yaml,sha256=Yr-GS94ffxnkaWojUfpErUMM7m_MShsYjR6QuDjMzwo,444
 content_core/py.typed,sha256=pLuU3XTTeVpXo4UomOjcvAIQqOrzIotlWlJ3KFo2lxQ,154
 content_core/templated_message.py,sha256=KbI2rcvgGM5oRIcsG68zAZfgNsC97fR16D61683ZSnY,1617
@@ -19,6 +19,8 @@ content_core/content/extraction/graph.py,sha256=Nn2iaQc6YJ4Qt8WKTolwUQUNNqUlwpV8
 content_core/content/identification/__init__.py,sha256=x4n8JIjDwmPvAopEEEcmZjlozg-zGbMq_s9VYdBjzYU,169
 content_core/content/summary/__init__.py,sha256=ReKCZWKfDtqlInKeh87Y1DEfiNzVWabGybEz3hS2FrI,114
 content_core/content/summary/core.py,sha256=kEabpETljzUb-yf0NcVWTOuCtayESo74gGBVDX7YTFs,550
+content_core/mcp/__init__.py,sha256=KNZYH4F9AoW1Orw1BtO3n92Cn-127hI7iF9gnGadueU,95
+content_core/mcp/server.py,sha256=m2A63Qle3nJ_Lw46uWkwVvYERtEw84hd7NHAn1rwdAQ,6968
 content_core/notebooks/run.ipynb,sha256=WPBNcQUNXR5MldNMghVcU4vE4ibrVmlANa80baQn8TA,371078
 content_core/processors/audio.py,sha256=Mie20g_2Akhw6BHBVo3sHMpDRYUkqBI72lEDakscx3s,5729
 content_core/processors/docling.py,sha256=dkXehsQdfyWXfrK1K_6Pye50ABM7DxMk6TMguabM9Pc,2151
@@ -32,8 +34,8 @@ content_core/tools/__init__.py,sha256=DuJmd7fE-NpDvLP8IW1XY5MUkAQcdks52rn2jk4N8j
 content_core/tools/cleanup.py,sha256=5IdKedsFyRQMdYzgFSKtsfyxJldbroXQXHesHICNENI,523
 content_core/tools/extract.py,sha256=-r2_jsuMMXyXxGVqWhh1ilNPo_UMYAbw3Pkp1FzPy5g,577
 content_core/tools/summarize.py,sha256=DPfeglLWB08q8SvHrsKpOKZ35XjduUDs2J02ISwjdj0,596
-content_core-1.0.4.dist-info/METADATA,sha256=SdXexgOV0tc4ArCYWjxrZog4esHJxW0zh8pdnZFqLi8,11908
-content_core-1.0.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-content_core-1.0.4.dist-info/entry_points.txt,sha256=9fGQUk6bxBVXj9PRwfWVPn54ClSEJV7J-KBLXtjOhQw,99
-content_core-1.0.4.dist-info/licenses/LICENSE,sha256=myj0z2T4qIkenCgLsRfx7Wk6UqCQNj5c7O14Qx4zpGg,1066
-content_core-1.0.4.dist-info/RECORD,,
+content_core-1.1.0.dist-info/METADATA,sha256=9-ppXQ7o-s8BCb2lH5xBiaiYBHmOFmXFrWntHuo9G_o,13017
+content_core-1.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+content_core-1.1.0.dist-info/entry_points.txt,sha256=ifbBxw37b7gAxZXoduS15KtqHuMHuU58STRkEmgM2zA,147
+content_core-1.1.0.dist-info/licenses/LICENSE,sha256=myj0z2T4qIkenCgLsRfx7Wk6UqCQNj5c7O14Qx4zpGg,1066
+content_core-1.1.0.dist-info/RECORD,,

{content_core-1.0.4.dist-info → content_core-1.1.0.dist-info}/entry_points.txt RENAMED Viewed

@@ -1,4 +1,5 @@
 [console_scripts]
 cclean = content_core:cclean
 ccore = content_core:ccore
+content-core-mcp = content_core.mcp.server:main
 csum = content_core:csum

{content_core-1.0.4.dist-info → content_core-1.1.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{content_core-1.0.4.dist-info → content_core-1.1.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

content-core 1.0.4__py3-none-any.whl → 1.1.0__py3-none-any.whl

Potentially problematic release.

content-core 1.0.4py3-none-any.whl → 1.1.0py3-none-any.whl