PyPI - content-core - Versions diffs - 1.4.2__py3-none-any.whl → 1.6.0__py3-none-any.whl - Mend

content-core 1.4.2py3-none-any.whl → 1.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of content-core might be problematic. Click here for more details.

Files changed (14) hide show

content_core/__init__.py +0 -2
content_core/cc_config.yaml +2 -0
content_core/config.py +71 -0
content_core/content/identification/file_detector.py +103 -13
content_core/content/summary/core.py +1 -1
content_core/notebooks/run.ipynb +0 -2
content_core/processors/audio.py +114 -47
content_core/processors/url.py +3 -3
content_core/templated_message.py +2 -2
{content_core-1.4.2.dist-info → content_core-1.6.0.dist-info}/METADATA +15 -1
{content_core-1.4.2.dist-info → content_core-1.6.0.dist-info}/RECORD +14 -14
{content_core-1.4.2.dist-info → content_core-1.6.0.dist-info}/WHEEL +0 -0
{content_core-1.4.2.dist-info → content_core-1.6.0.dist-info}/entry_points.txt +0 -0
{content_core-1.4.2.dist-info → content_core-1.6.0.dist-info}/licenses/LICENSE +0 -0

content_core/__init__.py CHANGED Viewed

@@ -214,5 +214,3 @@ def csum():
 if __name__ == "__main__":
     ccore()
-if __name__ == "__main__":
-    ccore()

content_core/cc_config.yaml CHANGED Viewed

@@ -32,6 +32,8 @@ summary_model:
 extraction:
   document_engine: auto  # auto | simple | docling - for files/documents
   url_engine: auto  # auto | simple | firecrawl | jina | docling - for URLs
+  audio:
+    concurrency: 3              # Number of concurrent audio transcriptions (1-10)
   docling:
     output_format: markdown  # markdown | html | json
   pymupdf:

content_core/config.py CHANGED Viewed

@@ -70,6 +70,61 @@ def get_url_engine():
         return env_engine
     return CONFIG.get("extraction", {}).get("url_engine", "auto")
+def get_audio_concurrency():
+    """
+    Get audio concurrency with environment variable override and validation.
+    Returns the configured number of concurrent audio transcriptions, with automatic
+    validation and fallback to safe defaults.
+    Configuration priority (highest to lowest):
+    1. CCORE_AUDIO_CONCURRENCY environment variable
+    2. extraction.audio.concurrency in YAML config
+    3. Default value: 3
+    Returns:
+        int: Number of concurrent transcriptions (1-10)
+    Validation:
+        - Values must be integers between 1 and 10 (inclusive)
+        - Invalid values (out of range, non-integer, etc.) automatically fall back to default
+        - A warning is logged when invalid values are detected
+    Examples:
+        >>> import os
+        >>> os.environ["CCORE_AUDIO_CONCURRENCY"] = "5"
+        >>> get_audio_concurrency()
+        5
+        >>> os.environ["CCORE_AUDIO_CONCURRENCY"] = "20"  # Too high
+        >>> get_audio_concurrency()  # Falls back to default
+        3
+    """
+    env_concurrency = os.environ.get("CCORE_AUDIO_CONCURRENCY")
+    if env_concurrency:
+        try:
+            concurrency = int(env_concurrency)
+            if concurrency < 1 or concurrency > 10:
+                # Import logger here to avoid circular imports
+                from content_core.logging import logger
+                logger.warning(
+                    f"Invalid CCORE_AUDIO_CONCURRENCY: '{env_concurrency}'. "
+                    f"Must be between 1 and 10. "
+                    f"Using default from config."
+                )
+                return CONFIG.get("extraction", {}).get("audio", {}).get("concurrency", 3)
+            return concurrency
+        except ValueError:
+            # Import logger here to avoid circular imports
+            from content_core.logging import logger
+            logger.warning(
+                f"Invalid CCORE_AUDIO_CONCURRENCY: '{env_concurrency}'. "
+                f"Must be a valid integer. "
+                f"Using default from config."
+            )
+            return CONFIG.get("extraction", {}).get("audio", {}).get("concurrency", 3)
+    return CONFIG.get("extraction", {}).get("audio", {}).get("concurrency", 3)
 # Programmatic config overrides: use in notebooks or scripts
 def set_document_engine(engine: str):
     """Override the document extraction engine ('auto', 'simple', or 'docling')."""
@@ -102,3 +157,19 @@ def set_pymupdf_ocr_fallback(enabled: bool):
     extraction = CONFIG.setdefault("extraction", {})
     pymupdf_cfg = extraction.setdefault("pymupdf", {})
     pymupdf_cfg["ocr_fallback"] = enabled
+def set_audio_concurrency(concurrency: int):
+    """
+    Override the audio concurrency setting (1-10).
+    Args:
+        concurrency (int): Number of concurrent audio transcriptions (1-10)
+    Raises:
+        ValueError: If concurrency is not between 1 and 10
+    """
+    if not isinstance(concurrency, int) or concurrency < 1 or concurrency > 10:
+        raise ValueError(f"Audio concurrency must be an integer between 1 and 10, got: {concurrency}")
+    extraction = CONFIG.setdefault("extraction", {})
+    audio_cfg = extraction.setdefault("audio", {})
+    audio_cfg["concurrency"] = concurrency

content_core/content/identification/file_detector.py CHANGED Viewed

@@ -3,10 +3,9 @@ Pure Python file type detection using magic bytes and content analysis.
 Replaces libmagic dependency with a lightweight implementation.
 """
-import os
 import zipfile
 from pathlib import Path
-from typing import Dict, Optional, Tuple
+from typing import Dict, Optional
 from content_core.common.exceptions import UnsupportedTypeException
 from content_core.logging import logger
@@ -14,10 +13,17 @@ from content_core.logging import logger
 class FileDetector:
     """Pure Python file type detection using magic bytes and content analysis."""
-    # Configuration constants
+    # Configuration constants for binary/text detection
     SIGNATURE_READ_SIZE = 512  # Bytes to read for binary signature detection
     TEXT_READ_SIZE = 1024      # Bytes to read for text content analysis
+    # Configuration constants for CSV detection
+    CSV_MAX_FIELD_LENGTH = 100  # Maximum average field length for CSV (longer suggests prose)
+    CSV_MAX_VARIANCE = 500      # Maximum variance in field lengths (higher suggests natural text)
+    CSV_MIN_SCORE = 2           # Minimum score required to classify as CSV
+    CSV_MIN_FIELDS = 2          # Minimum number of fields required for CSV
+    CSV_MAX_HEADER_FIELD_LENGTH = 50  # Maximum length for individual header fields
     def __init__(self):
         """Initialize the FileDetector with signature mappings."""
@@ -365,18 +371,102 @@ class FileDetector:
     def _looks_like_csv(self, content: str) -> bool:
-        """Check if content looks like CSV format."""
-        lines = content.split('\n', 5)[:5]  # Check first 5 lines
-        if len(lines) < 2:
+        """
+        Check if content looks like CSV format with improved heuristics.
+        Uses a multi-stage approach with performance optimization:
+        1. Basic structural checks (cheap)
+        2. Field length analysis (cheap, early exit)
+        3. Pattern matching (moderate cost)
+        4. Variance analysis (expensive, only if needed)
+        """
+        lines = content.split('\n', 10)[:10]  # Check first 10 lines for better accuracy
+        non_empty_lines = [line for line in lines if line.strip()]
+        # Stage 1: Basic structural checks
+        if len(non_empty_lines) < 2:
             return False
         # Count commas in each line
-        comma_counts = [line.count(',') for line in lines if line.strip()]
-        if not comma_counts:
+        comma_counts = [line.count(',') for line in non_empty_lines]
+        # Must have at least one comma per line
+        if not all(count > 0 for count in comma_counts):
             return False
-        # CSV should have consistent comma counts
-        return len(set(comma_counts)) == 1 and comma_counts[0] > 0
+        # CSV should have consistent comma counts across lines
+        if len(set(comma_counts)) != 1:
+            return False
+        num_fields = comma_counts[0] + 1  # Number of fields = commas + 1
+        # Must have minimum number of fields to be CSV
+        if num_fields < self.CSV_MIN_FIELDS:
+            return False
+        # Stage 2: Field length analysis (PERFORMANCE OPTIMIZATION: early exit)
+        first_line = non_empty_lines[0]
+        fields = first_line.split(',')
+        # CSV fields should be relatively short (not long sentences)
+        # Average field length should be reasonable (not paragraphs)
+        # Early exit avoids expensive variance calculations for obvious prose
+        avg_field_length = sum(len(f.strip()) for f in fields) / len(fields)
+        if avg_field_length > self.CSV_MAX_FIELD_LENGTH:
+            return False  # Too long to be typical CSV fields - exit early
+        # Stage 3: Pattern matching
+        # Check for CSV-like patterns:
+        # 1. Fields that look like headers (short, alphanumeric)
+        # 2. Quoted fields (common in CSV)
+        # 3. Numeric fields
+        has_quoted_fields = any('"' in line or "'" in line for line in non_empty_lines[:3])
+        first_line_fields = [f.strip() for f in fields]
+        # Check if first line looks like a header (short, no sentence-ending punctuation)
+        looks_like_header = all(
+            len(f) < self.CSV_MAX_HEADER_FIELD_LENGTH and not f.endswith('.') and not f.endswith('!')
+            for f in first_line_fields
+        )
+        # Stage 4: Variance analysis (EXPENSIVE - only if we have enough data)
+        # Check if subsequent lines have similar field structure
+        # Real CSV tends to have consistent field lengths
+        if len(non_empty_lines) >= 3:
+            field_lengths_per_line = []
+            for line in non_empty_lines[:5]:
+                line_fields = line.split(',')
+                field_lengths = [len(f.strip()) for f in line_fields]
+                field_lengths_per_line.append(field_lengths)
+            # Calculate variance in field positions
+            # CSV data should have relatively consistent field lengths at each position
+            # Natural text with commas will have much more variance
+            position_variances = []
+            for i in range(num_fields):
+                lengths_at_position = [fl[i] if i < len(fl) else 0 for fl in field_lengths_per_line]
+                if lengths_at_position:
+                    avg = sum(lengths_at_position) / len(lengths_at_position)
+                    variance = sum((x - avg) ** 2 for x in lengths_at_position) / len(lengths_at_position)
+                    position_variances.append(variance)
+            # High variance suggests natural text, not structured CSV
+            if position_variances:
+                avg_variance = sum(position_variances) / len(position_variances)
+                if avg_variance > self.CSV_MAX_VARIANCE:
+                    return False  # Very high variance = likely prose
+        # Scoring: Require at least some CSV-like characteristics
+        csv_score = 0
+        if looks_like_header:
+            csv_score += 1
+        if has_quoted_fields:
+            csv_score += 1
+        if num_fields >= 3:  # Multiple fields is more CSV-like
+            csv_score += 1
+        # Need minimum score to confidently classify as CSV
+        return csv_score >= self.CSV_MIN_SCORE
     def _is_text_file(self, content: str) -> bool:

content_core/content/summary/core.py CHANGED Viewed

@@ -8,7 +8,7 @@ async def summarize(content: str, context: str) -> str:
     templated_message_fn = partial(templated_message, model=ModelFactory.get_model('summary_model'))
     response = await templated_message_fn(
         TemplatedMessageInput(
-            user_prompt_template="prompts/content/summarize",
+            user_prompt_template="content/summarize",
             data={"content": content, "context": context},
         )
     )

content_core/notebooks/run.ipynb CHANGED Viewed

@@ -63,8 +63,6 @@
    "source": [
     "from content_core.content.extraction import extract_content\n",
     "\n",
-    "from content_core.content.cleanup import cleanup_content\n",
-    "from content_core.content.summary import summarize\n",
     "\n",
     "\n",
     "yt = await extract_content(dict(url=\"https://www.youtube.com/watch?v=lLprprtHfts\"))\n",

content_core/processors/audio.py CHANGED Viewed

@@ -8,11 +8,9 @@ from functools import partial
 from moviepy import AudioFileClip
 from content_core.common import ProcessSourceState
+from content_core.config import get_audio_concurrency
 from content_core.logging import logger
-# todo: remove reference to model_manager
-# future: parallelize the transcription process
 async def split_audio(input_file, segment_length_minutes=15, output_prefix=None):
     """
@@ -47,7 +45,7 @@ async def split_audio(input_file, segment_length_minutes=15, output_prefix=None)
             end_time = min((i + 1) * segment_length_s, audio.duration)
             # Extract segment
-            output_filename = f"{output_prefix}_{str(i+1).zfill(3)}.mp3"
+            output_filename = f"{output_prefix}_{str(i + 1).zfill(3)}.mp3"
             output_path = os.path.join(output_dir, output_filename)
             # Export segment
@@ -55,7 +53,9 @@ async def split_audio(input_file, segment_length_minutes=15, output_prefix=None)
             output_files.append(output_path)
-            logger.debug(f"Exported segment {i+1}/{total_segments}: {output_filename}")
+            logger.debug(
+                f"Exported segment {i + 1}/{total_segments}: {output_filename}"
+            )
         return output_files
@@ -98,61 +98,128 @@ def extract_audio(
         raise
-async def transcribe_audio_segment(audio_file, model):
-    """Transcribe a single audio segment asynchronously"""
-    return (await model.atranscribe(audio_file)).text
+async def transcribe_audio_segment(audio_file, model, semaphore):
+    """
+    Transcribe a single audio segment asynchronously with concurrency control.
+    This function uses a semaphore to limit the number of concurrent transcriptions,
+    preventing API rate limits while allowing parallel processing for improved performance.
-async def extract_audio_data(data: ProcessSourceState):
-    input_audio_path = data.file_path
+    Args:
+        audio_file (str): Path to the audio file segment to transcribe
+        model: Speech-to-text model instance with atranscribe() method
+        semaphore (asyncio.Semaphore): Semaphore to control concurrency
-    try:
-        # Create a temporary directory for audio segments
-        temp_dir = tempfile.mkdtemp()
-        output_prefix = os.path.splitext(os.path.basename(input_audio_path))[0]
-        output_dir = temp_dir
-        os.makedirs(output_dir, exist_ok=True)
+    Returns:
+        str: Transcribed text from the audio segment
-        # Split audio into segments if longer than 10 minutes
-        audio = AudioFileClip(input_audio_path)
-        duration_s = audio.duration
-        segment_length_s = 10 * 60  # 10 minutes in seconds
-        output_files = []
+    Note:
+        Multiple instances of this function can run concurrently, but the semaphore
+        ensures that no more than N transcriptions happen simultaneously, where N
+        is configured via get_audio_concurrency() (default: 3, range: 1-10).
+    """
+    async with semaphore:
+        return (await model.atranscribe(audio_file)).text
-        if duration_s > segment_length_s:
-            logger.info(
-                f"Audio is longer than 10 minutes ({duration_s}s), splitting into {math.ceil(duration_s / segment_length_s)} segments"
-            )
-            for i in range(math.ceil(duration_s / segment_length_s)):
-                start_time = i * segment_length_s
-                end_time = min((i + 1) * segment_length_s, audio.duration)
-                # Extract segment
-                output_filename = f"{output_prefix}_{str(i+1).zfill(3)}.mp3"
-                output_path = os.path.join(output_dir, output_filename)
+async def extract_audio_data(data: ProcessSourceState):
+    """
+    Extract and transcribe audio from a file with automatic segmentation and parallel processing.
+    This function handles the complete audio processing pipeline:
+    1. Splits long audio files (>10 minutes) into segments
+    2. Transcribes segments in parallel using configurable concurrency
+    3. Joins transcriptions in correct order
-                extract_audio(input_audio_path, output_path, start_time, end_time)
+    For files longer than 10 minutes, segments are processed concurrently with a
+    configurable concurrency limit to balance performance and API rate limits.
-                output_files.append(output_path)
-        else:
-            output_files = [input_audio_path]
+    Args:
+        data (ProcessSourceState): State object containing file_path to audio/video file
+    Returns:
+        dict: Dictionary containing:
+            - metadata: Information about processed segments count
+            - content: Complete transcribed text
-        # Transcribe audio files
-        from content_core.models import ModelFactory
+    Configuration:
+        Concurrency is controlled via:
+        - Environment variable: CCORE_AUDIO_CONCURRENCY (1-10, default: 3)
+        - YAML config: extraction.audio.concurrency
-        speech_to_text_model = ModelFactory.get_model("speech_to_text")
-        transcriptions = []
-        for audio_file in output_files:
-            transcription = await transcribe_audio_segment(
-                audio_file, speech_to_text_model
+    Raises:
+        Exception: If audio extraction or transcription fails
+    """
+    input_audio_path = data.file_path
+    audio = None
+    try:
+        # Use TemporaryDirectory context manager for automatic cleanup
+        with tempfile.TemporaryDirectory() as temp_dir:
+            output_prefix = os.path.splitext(os.path.basename(input_audio_path))[0]
+            output_dir = temp_dir
+            # Split audio into segments if longer than 10 minutes
+            audio = AudioFileClip(input_audio_path)
+            duration_s = audio.duration
+            segment_length_s = 10 * 60  # 10 minutes in seconds
+            output_files = []
+            if duration_s > segment_length_s:
+                logger.info(
+                    f"Audio is longer than 10 minutes ({duration_s}s), splitting into {math.ceil(duration_s / segment_length_s)} segments"
+                )
+                for i in range(math.ceil(duration_s / segment_length_s)):
+                    start_time = i * segment_length_s
+                    end_time = min((i + 1) * segment_length_s, audio.duration)
+                    # Extract segment
+                    output_filename = f"{output_prefix}_{str(i + 1).zfill(3)}.mp3"
+                    output_path = os.path.join(output_dir, output_filename)
+                    extract_audio(input_audio_path, output_path, start_time, end_time)
+                    output_files.append(output_path)
+            else:
+                output_files = [input_audio_path]
+            # Close audio clip after determining segments
+            if audio:
+                audio.close()
+                audio = None
+            # Transcribe audio files in parallel with concurrency limit
+            from content_core.models import ModelFactory
+            speech_to_text_model = ModelFactory.get_model("speech_to_text")
+            concurrency = get_audio_concurrency()
+            semaphore = asyncio.Semaphore(concurrency)
+            logger.debug(
+                f"Transcribing {len(output_files)} audio segments with concurrency limit of {concurrency}"
             )
-            transcriptions.append(transcription)
-        return {
-            "metadata": {"audio_files": output_files},
-            "content": " ".join(transcriptions),
-        }
+            # Create tasks for parallel transcription
+            transcription_tasks = [
+                transcribe_audio_segment(audio_file, speech_to_text_model, semaphore)
+                for audio_file in output_files
+            ]
+            # Execute all transcriptions concurrently (limited by semaphore)
+            transcriptions = await asyncio.gather(*transcription_tasks)
+            return {
+                "metadata": {"segments_count": len(output_files)},
+                "content": " ".join(transcriptions),
+            }
     except Exception as e:
         logger.error(f"Error processing audio: {str(e)}")
         logger.error(traceback.format_exc())
         raise
+    finally:
+        # Ensure audio clip is closed even if an error occurs
+        if audio:
+            try:
+                audio.close()
+            except Exception:
+                pass

content_core/processors/url.py CHANGED Viewed

@@ -147,10 +147,10 @@ async def extract_url_firecrawl(url: str):
         from firecrawl import AsyncFirecrawlApp
         app = AsyncFirecrawlApp(api_key=os.environ.get("FIRECRAWL_API_KEY"))
-        scrape_result = await app.scrape_url(url, formats=["markdown", "html"])
+        scrape_result = await app.scrape(url, formats=["markdown", "html"])
         return {
-            "title": scrape_result.metadata["title"] or scrape_result.title,
-            "content": scrape_result.markdown,
+            "title": scrape_result.metadata.title or "",
+            "content": scrape_result.markdown or "",
         }
     except Exception as e:

content_core/templated_message.py CHANGED Viewed

@@ -36,14 +36,14 @@ async def templated_message(
             prompt_template=input.system_prompt_template,
             template_text=input.system_prompt_text,
         ).render(data=input.data)
-        msgs.append(Message(role="system", content=system_prompt))
+        msgs.append({"role": "system", "content": system_prompt})
     if input.user_prompt_template or input.user_prompt_text:
         user_prompt = Prompter(
             prompt_template=input.user_prompt_template,
             template_text=input.user_prompt_text,
         ).render(data=input.data)
-        msgs.append(Message(role="user", content=user_prompt))
+        msgs.append({"role": "user", "content": user_prompt})
     result = await model.achat_complete(msgs)
     return result.content

{content_core-1.4.2.dist-info → content_core-1.6.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: content-core
-Version: 1.4.2
+Version: 1.6.0
 Summary: Extract what matters from any media source. Available as Python Library, macOS Service, CLI and MCP Server
 Author-email: LUIS NOVO <lfnovo@gmail.com>
 License-File: LICENSE
@@ -548,6 +548,9 @@ GOOGLE_API_KEY=your-key-here
 # Engine Selection (optional)
 CCORE_DOCUMENT_ENGINE=auto  # auto, simple, docling
 CCORE_URL_ENGINE=auto       # auto, simple, firecrawl, jina
+# Audio Processing (optional)
+CCORE_AUDIO_CONCURRENCY=3   # Number of concurrent audio transcriptions (1-10, default: 3)
 ```
 ### Engine Selection via Environment Variables
@@ -556,9 +559,20 @@ For deployment scenarios like MCP servers or Raycast extensions, you can overrid
 - **`CCORE_DOCUMENT_ENGINE`**: Force document engine (`auto`, `simple`, `docling`)
 - **`CCORE_URL_ENGINE`**: Force URL engine (`auto`, `simple`, `firecrawl`, `jina`)
+- **`CCORE_AUDIO_CONCURRENCY`**: Number of concurrent audio transcriptions (1-10, default: 3)
 These variables take precedence over config file settings and provide explicit control for different deployment scenarios.
+### Audio Processing Configuration
+Content Core processes long audio files by splitting them into segments and transcribing them in parallel for improved performance. You can control the concurrency level to balance speed with API rate limits:
+- **Default**: 3 concurrent transcriptions
+- **Range**: 1-10 concurrent transcriptions
+- **Configuration**: Set via `CCORE_AUDIO_CONCURRENCY` environment variable or `extraction.audio.concurrency` in `cc_config.yaml`
+Higher concurrency values can speed up processing of long audio/video files but may hit API rate limits. Lower values are more conservative and suitable for accounts with lower API quotas.
 ### Custom Prompt Templates
 Content Core allows you to define custom prompt templates for content processing. By default, the library uses built-in prompts located in the `prompts` directory. However, you can create your own prompt templates and store them in a dedicated directory. To specify the location of your custom prompts, set the `PROMPT_PATH` environment variable in your `.env` file or system environment.

{content_core-1.4.2.dist-info → content_core-1.6.0.dist-info}/RECORD RENAMED Viewed

@@ -1,11 +1,11 @@
-content_core/__init__.py,sha256=t4xFo9f3uB2FD1tdR-7ruhMW9_ciJawQReK6iFXWfR0,6531
-content_core/cc_config.yaml,sha256=hjTt5z1Z9b5LShVIqNT3OiAnTAdmr0LB5y8RTyH-fNA,1119
-content_core/config.py,sha256=3XAsMF3EhDJ6aCpzk1UZG_m3-SFdYe3cHiDPH7eVGwQ,4312
+content_core/__init__.py,sha256=c1qw2HyV8DqBnoB9fbXNIZtda60MrxGUvWG5EQFtr-8,6492
+content_core/cc_config.yaml,sha256=3Ot5u-YSBx2k3JXWnCP7s7OVBbGpGebBy_CWj3we-u4,1211
+content_core/config.py,sha256=Mao6AZZoiSiX7uZwOGgk759LlV0j6NdfYGgWgX6vhAs,7112
 content_core/logging.py,sha256=oeRdWKknEolptopxF1IvnEGEc0ZUw45QXYUEZ71GcdY,438
 content_core/models.py,sha256=Kt6tWdAX87eQ2tL6eTwcHU7_NIRnN4exP4RzV2WrMig,881
 content_core/models_config.yaml,sha256=Yr-GS94ffxnkaWojUfpErUMM7m_MShsYjR6QuDjMzwo,444
 content_core/py.typed,sha256=pLuU3XTTeVpXo4UomOjcvAIQqOrzIotlWlJ3KFo2lxQ,154
-content_core/templated_message.py,sha256=KbI2rcvgGM5oRIcsG68zAZfgNsC97fR16D61683ZSnY,1617
+content_core/templated_message.py,sha256=jsjGqD-zf__pV4P0eo9cffTK2C90-VggL64qNYejFo0,1615
 content_core/common/__init__.py,sha256=SjDp-0QRjX9PMubyTjv77_GrUqm6eC4gBuXr593JVK4,525
 content_core/common/exceptions.py,sha256=NpYedVbckIq4kP2wek7bicMVgGGn0fkhCvid5cIxfy4,1304
 content_core/common/state.py,sha256=K5jsDg4l2GSaoGyFYzdd1GW14vLaAxdxes8vUrPNVkE,1622
@@ -17,27 +17,27 @@ content_core/content/cleanup/core.py,sha256=AXUGUWxGob8si5uKRnDrreOcHV_gbGJr4YnR
 content_core/content/extraction/__init__.py,sha256=TaYw6CAcG62GZfsJxeZ6VJDLP85BU2a7_G271v6WWPk,446
 content_core/content/extraction/graph.py,sha256=AFi9B_hTuxqdgvogCOk4Xdqoboug7_KXtV0ZHlb8igM,8139
 content_core/content/identification/__init__.py,sha256=DDoCi1r-6Z_pGPPi3X1ZwyRrcRtg-rAiCTK50hnO5Y0,235
-content_core/content/identification/file_detector.py,sha256=s_10Osxv8gfVfs3UPXFzCOosvWCrf4ZCFXcW2yimUIM,17170
+content_core/content/identification/file_detector.py,sha256=GBP1cCNc1qnt5_HkE5ALQDja2f8WgqCC5h1r8xF0P7A,21480
 content_core/content/summary/__init__.py,sha256=ReKCZWKfDtqlInKeh87Y1DEfiNzVWabGybEz3hS2FrI,114
-content_core/content/summary/core.py,sha256=kEabpETljzUb-yf0NcVWTOuCtayESo74gGBVDX7YTFs,550
+content_core/content/summary/core.py,sha256=LejUbPxnRD0sbO6MupiIb-IHLxEUGU5beBZwmIiBncc,542
 content_core/mcp/__init__.py,sha256=KNZYH4F9AoW1Orw1BtO3n92Cn-127hI7iF9gnGadueU,95
 content_core/mcp/server.py,sha256=ql0uXHkIbZlHQUhUQ4CaRnj19xT6t8ErydWntFgmtUg,7021
-content_core/notebooks/run.ipynb,sha256=WPBNcQUNXR5MldNMghVcU4vE4ibrVmlANa80baQn8TA,371078
+content_core/notebooks/run.ipynb,sha256=8gbFln9WLrli_qWJB8SKQKcSNbAv25DvN5Cu4EAAeBQ,370952
 content_core/notebooks/urls.ipynb,sha256=gSmiSzmbol_Li36w8tpUsy5QgRbrnBx94Ry2zHwMvwY,7107
-content_core/processors/audio.py,sha256=Mie20g_2Akhw6BHBVo3sHMpDRYUkqBI72lEDakscx3s,5729
+content_core/processors/audio.py,sha256=CYwoTDPsVUDALHuz_EHcnjVfsKF8XjQmvmX8c-OmMNU,8462
 content_core/processors/docling.py,sha256=lf_NHh255gn4d2EymJYqyH2QiAgQDiJCY3t6Ne7R9rU,2507
 content_core/processors/office.py,sha256=DXkfmjqUhmhP6rJaO5Z5Y9sv-iK0zaPZ3waynFIPtsk,12153
 content_core/processors/pdf.py,sha256=TTDhfV2INtXumFDjLJFNMRfpbJ_tqwIcSBDzuThKxJI,10617
 content_core/processors/text.py,sha256=kKHA60-NYjLmCTYUnk8TdJxQQ0Shkg-K61Ezqaelz7k,1158
-content_core/processors/url.py,sha256=To0LTtMVNN3M83CdodQaZFuU7-IMM5w9QOHRKNV8PVI,7532
+content_core/processors/url.py,sha256=RhBOyqfSWFaf8Dhpxlo9xbsF5yuP5FhXfhbvbi4CQPc,7514
 content_core/processors/video.py,sha256=3WnZwTswvTLm8PtQhKwoqJ2BH6YZi62dMUjALwJiebo,5196
 content_core/processors/youtube.py,sha256=_qvxI9qTdxu3l1fKLuJARFt8KtZVFJ3JJBLkq1hAAXo,7868
 content_core/tools/__init__.py,sha256=DuJmd7fE-NpDvLP8IW1XY5MUkAQcdks52rn2jk4N8jQ,231
 content_core/tools/cleanup.py,sha256=5IdKedsFyRQMdYzgFSKtsfyxJldbroXQXHesHICNENI,523
 content_core/tools/extract.py,sha256=-r2_jsuMMXyXxGVqWhh1ilNPo_UMYAbw3Pkp1FzPy5g,577
 content_core/tools/summarize.py,sha256=DPfeglLWB08q8SvHrsKpOKZ35XjduUDs2J02ISwjdj0,596
-content_core-1.4.2.dist-info/METADATA,sha256=E8l57dOkDGx8_GUnk4BsaLbFKD560wKjQLlydqar1jQ,21093
-content_core-1.4.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-content_core-1.4.2.dist-info/entry_points.txt,sha256=ifbBxw37b7gAxZXoduS15KtqHuMHuU58STRkEmgM2zA,147
-content_core-1.4.2.dist-info/licenses/LICENSE,sha256=myj0z2T4qIkenCgLsRfx7Wk6UqCQNj5c7O14Qx4zpGg,1066
-content_core-1.4.2.dist-info/RECORD,,
+content_core-1.6.0.dist-info/METADATA,sha256=bBxEINm9h2ppJIia11flDRDH7UshzamVrHKHGxHrmjs,21963
+content_core-1.6.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+content_core-1.6.0.dist-info/entry_points.txt,sha256=ifbBxw37b7gAxZXoduS15KtqHuMHuU58STRkEmgM2zA,147
+content_core-1.6.0.dist-info/licenses/LICENSE,sha256=myj0z2T4qIkenCgLsRfx7Wk6UqCQNj5c7O14Qx4zpGg,1066
+content_core-1.6.0.dist-info/RECORD,,

{content_core-1.4.2.dist-info → content_core-1.6.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{content_core-1.4.2.dist-info → content_core-1.6.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{content_core-1.4.2.dist-info → content_core-1.6.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

content-core 1.4.2__py3-none-any.whl → 1.6.0__py3-none-any.whl

Potentially problematic release.

content-core 1.4.2py3-none-any.whl → 1.6.0py3-none-any.whl