PyPI - content-core - Versions diffs - 1.6.0__py3-none-any.whl → 1.7.0__py3-none-any.whl - Mend

content-core 1.6.0py3-none-any.whl → 1.7.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of content-core might be problematic. Click here for more details.

Files changed (8) hide show

content_core/common/state.py CHANGED Viewed

@@ -27,6 +27,14 @@ class ProcessSourceState(BaseModel):
         default=None,
         description="Override Docling output format: 'markdown', 'html', or 'json'",
     )
+    audio_provider: Optional[str] = Field(
+        default=None,
+        description="Override speech-to-text provider (e.g., 'openai', 'google')",
+    )
+    audio_model: Optional[str] = Field(
+        default=None,
+        description="Override speech-to-text model name (e.g., 'whisper-1', 'chirp')",
+    )
 class ProcessSourceInput(BaseModel):
@@ -36,6 +44,8 @@ class ProcessSourceInput(BaseModel):
     document_engine: Optional[str] = None
     url_engine: Optional[str] = None
     output_format: Optional[str] = None
+    audio_provider: Optional[str] = None
+    audio_model: Optional[str] = None
 class ProcessSourceOutput(BaseModel):

content_core/processors/audio.py CHANGED Viewed

@@ -190,8 +190,39 @@ async def extract_audio_data(data: ProcessSourceState):
             # Transcribe audio files in parallel with concurrency limit
             from content_core.models import ModelFactory
+            from esperanto import AIFactory
+            # Determine which model to use based on state parameters
+            if data.audio_provider and data.audio_model:
+                # Custom model provided - create new instance
+                try:
+                    logger.info(
+                        f"Using custom audio model: {data.audio_provider}/{data.audio_model}"
+                    )
+                    speech_to_text_model = AIFactory.create_speech_to_text(
+                        data.audio_provider, data.audio_model
+                    )
+                except Exception as e:
+                    logger.error(
+                        f"Failed to create custom audio model '{data.audio_provider}/{data.audio_model}': {e}. "
+                        f"Check that the provider and model are supported by Esperanto. "
+                        f"Falling back to default model."
+                    )
+                    speech_to_text_model = ModelFactory.get_model("speech_to_text")
+            elif data.audio_provider or data.audio_model:
+                # Only one parameter provided - log warning and use default
+                missing = "audio_model" if data.audio_provider else "audio_provider"
+                provided = "audio_provider" if data.audio_provider else "audio_model"
+                logger.warning(
+                    f"{provided} provided without {missing}. "
+                    f"Both audio_provider and audio_model must be specified together. "
+                    f"Falling back to default model."
+                )
+                speech_to_text_model = ModelFactory.get_model("speech_to_text")
+            else:
+                # No custom parameters - use default (backward compatible)
+                speech_to_text_model = ModelFactory.get_model("speech_to_text")
-            speech_to_text_model = ModelFactory.get_model("speech_to_text")
             concurrency = get_audio_concurrency()
             semaphore = asyncio.Semaphore(concurrency)

content_core/templated_message.py CHANGED Viewed

@@ -2,7 +2,6 @@ from typing import Dict, Optional, Union
 from ai_prompter import Prompter
 from esperanto import LanguageModel
-from esperanto.common_types import Message
 from pydantic import BaseModel, Field
 from content_core.models import ModelFactory

{content_core-1.6.0.dist-info → content_core-1.7.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: content-core
-Version: 1.6.0
+Version: 1.7.0
 Summary: Extract what matters from any media source. Available as Python Library, macOS Service, CLI and MCP Server
 Author-email: LUIS NOVO <lfnovo@gmail.com>
 License-File: LICENSE
@@ -263,6 +263,14 @@ cleaned_text = await cc.clean("...messy text with [brackets] and extra spaces...
 # Summarize content with optional context
 summary = await cc.summarize_content("long article text", context="explain to a child")
+# Extract audio with custom speech-to-text model
+from content_core.common import ProcessSourceInput
+result = await cc.extract(ProcessSourceInput(
+    file_path="interview.mp3",
+    audio_provider="openai",
+    audio_model="whisper-1"
+))
 ```
 ## Documentation

{content_core-1.6.0.dist-info → content_core-1.7.0.dist-info}/RECORD RENAMED Viewed

@@ -5,10 +5,10 @@ content_core/logging.py,sha256=oeRdWKknEolptopxF1IvnEGEc0ZUw45QXYUEZ71GcdY,438
 content_core/models.py,sha256=Kt6tWdAX87eQ2tL6eTwcHU7_NIRnN4exP4RzV2WrMig,881
 content_core/models_config.yaml,sha256=Yr-GS94ffxnkaWojUfpErUMM7m_MShsYjR6QuDjMzwo,444
 content_core/py.typed,sha256=pLuU3XTTeVpXo4UomOjcvAIQqOrzIotlWlJ3KFo2lxQ,154
-content_core/templated_message.py,sha256=jsjGqD-zf__pV4P0eo9cffTK2C90-VggL64qNYejFo0,1615
+content_core/templated_message.py,sha256=F4ysbVUWG1V3-pT8NYbCzP5mJN_qRYtiWPa9gxjB9v0,1572
 content_core/common/__init__.py,sha256=SjDp-0QRjX9PMubyTjv77_GrUqm6eC4gBuXr593JVK4,525
 content_core/common/exceptions.py,sha256=NpYedVbckIq4kP2wek7bicMVgGGn0fkhCvid5cIxfy4,1304
-content_core/common/state.py,sha256=K5jsDg4l2GSaoGyFYzdd1GW14vLaAxdxes8vUrPNVkE,1622
+content_core/common/state.py,sha256=Xxxtdi650x4zkNX3yXA9Jx79GAzud7Vu-I7eNEjHlhI,2010
 content_core/common/types.py,sha256=DOQFW5ySHELc_mZU6G_7PUy1kmnP4aU4IpMyyXDQcBE,177
 content_core/common/utils.py,sha256=0o4jovPEw_6wu7EcPPbDNZskbhhfLUBJBvRmp0Yc4R4,1182
 content_core/content/__init__.py,sha256=7IxfLTUHKyHjoT4MfWM2PX2J3QBeYcuERzE9vFeFiQM,230
@@ -24,7 +24,7 @@ content_core/mcp/__init__.py,sha256=KNZYH4F9AoW1Orw1BtO3n92Cn-127hI7iF9gnGadueU,
 content_core/mcp/server.py,sha256=ql0uXHkIbZlHQUhUQ4CaRnj19xT6t8ErydWntFgmtUg,7021
 content_core/notebooks/run.ipynb,sha256=8gbFln9WLrli_qWJB8SKQKcSNbAv25DvN5Cu4EAAeBQ,370952
 content_core/notebooks/urls.ipynb,sha256=gSmiSzmbol_Li36w8tpUsy5QgRbrnBx94Ry2zHwMvwY,7107
-content_core/processors/audio.py,sha256=CYwoTDPsVUDALHuz_EHcnjVfsKF8XjQmvmX8c-OmMNU,8462
+content_core/processors/audio.py,sha256=h4aPff8WjDklE2iCviuAEEAYJTTxmWh9nOgMYJHWzmM,10202
 content_core/processors/docling.py,sha256=lf_NHh255gn4d2EymJYqyH2QiAgQDiJCY3t6Ne7R9rU,2507
 content_core/processors/office.py,sha256=DXkfmjqUhmhP6rJaO5Z5Y9sv-iK0zaPZ3waynFIPtsk,12153
 content_core/processors/pdf.py,sha256=TTDhfV2INtXumFDjLJFNMRfpbJ_tqwIcSBDzuThKxJI,10617
@@ -36,8 +36,8 @@ content_core/tools/__init__.py,sha256=DuJmd7fE-NpDvLP8IW1XY5MUkAQcdks52rn2jk4N8j
 content_core/tools/cleanup.py,sha256=5IdKedsFyRQMdYzgFSKtsfyxJldbroXQXHesHICNENI,523
 content_core/tools/extract.py,sha256=-r2_jsuMMXyXxGVqWhh1ilNPo_UMYAbw3Pkp1FzPy5g,577
 content_core/tools/summarize.py,sha256=DPfeglLWB08q8SvHrsKpOKZ35XjduUDs2J02ISwjdj0,596
-content_core-1.6.0.dist-info/METADATA,sha256=bBxEINm9h2ppJIia11flDRDH7UshzamVrHKHGxHrmjs,21963
-content_core-1.6.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-content_core-1.6.0.dist-info/entry_points.txt,sha256=ifbBxw37b7gAxZXoduS15KtqHuMHuU58STRkEmgM2zA,147
-content_core-1.6.0.dist-info/licenses/LICENSE,sha256=myj0z2T4qIkenCgLsRfx7Wk6UqCQNj5c7O14Qx4zpGg,1066
-content_core-1.6.0.dist-info/RECORD,,
+content_core-1.7.0.dist-info/METADATA,sha256=l3oDAdfN_gMFfOgHz3fELrjSxUXq8AKRKbC5uVF6mzM,22201
+content_core-1.7.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+content_core-1.7.0.dist-info/entry_points.txt,sha256=ifbBxw37b7gAxZXoduS15KtqHuMHuU58STRkEmgM2zA,147
+content_core-1.7.0.dist-info/licenses/LICENSE,sha256=myj0z2T4qIkenCgLsRfx7Wk6UqCQNj5c7O14Qx4zpGg,1066
+content_core-1.7.0.dist-info/RECORD,,

{content_core-1.6.0.dist-info → content_core-1.7.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{content_core-1.6.0.dist-info → content_core-1.7.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{content_core-1.6.0.dist-info → content_core-1.7.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

content-core 1.6.0__py3-none-any.whl → 1.7.0__py3-none-any.whl

Potentially problematic release.

content-core 1.6.0py3-none-any.whl → 1.7.0py3-none-any.whl