content-core 1.6.0__py3-none-any.whl → 1.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of content-core might be problematic. Click here for more details.

@@ -27,6 +27,14 @@ class ProcessSourceState(BaseModel):
27
27
  default=None,
28
28
  description="Override Docling output format: 'markdown', 'html', or 'json'",
29
29
  )
30
+ audio_provider: Optional[str] = Field(
31
+ default=None,
32
+ description="Override speech-to-text provider (e.g., 'openai', 'google')",
33
+ )
34
+ audio_model: Optional[str] = Field(
35
+ default=None,
36
+ description="Override speech-to-text model name (e.g., 'whisper-1', 'chirp')",
37
+ )
30
38
 
31
39
 
32
40
  class ProcessSourceInput(BaseModel):
@@ -36,6 +44,8 @@ class ProcessSourceInput(BaseModel):
36
44
  document_engine: Optional[str] = None
37
45
  url_engine: Optional[str] = None
38
46
  output_format: Optional[str] = None
47
+ audio_provider: Optional[str] = None
48
+ audio_model: Optional[str] = None
39
49
 
40
50
 
41
51
  class ProcessSourceOutput(BaseModel):
@@ -190,8 +190,39 @@ async def extract_audio_data(data: ProcessSourceState):
190
190
 
191
191
  # Transcribe audio files in parallel with concurrency limit
192
192
  from content_core.models import ModelFactory
193
+ from esperanto import AIFactory
194
+
195
+ # Determine which model to use based on state parameters
196
+ if data.audio_provider and data.audio_model:
197
+ # Custom model provided - create new instance
198
+ try:
199
+ logger.info(
200
+ f"Using custom audio model: {data.audio_provider}/{data.audio_model}"
201
+ )
202
+ speech_to_text_model = AIFactory.create_speech_to_text(
203
+ data.audio_provider, data.audio_model
204
+ )
205
+ except Exception as e:
206
+ logger.error(
207
+ f"Failed to create custom audio model '{data.audio_provider}/{data.audio_model}': {e}. "
208
+ f"Check that the provider and model are supported by Esperanto. "
209
+ f"Falling back to default model."
210
+ )
211
+ speech_to_text_model = ModelFactory.get_model("speech_to_text")
212
+ elif data.audio_provider or data.audio_model:
213
+ # Only one parameter provided - log warning and use default
214
+ missing = "audio_model" if data.audio_provider else "audio_provider"
215
+ provided = "audio_provider" if data.audio_provider else "audio_model"
216
+ logger.warning(
217
+ f"{provided} provided without {missing}. "
218
+ f"Both audio_provider and audio_model must be specified together. "
219
+ f"Falling back to default model."
220
+ )
221
+ speech_to_text_model = ModelFactory.get_model("speech_to_text")
222
+ else:
223
+ # No custom parameters - use default (backward compatible)
224
+ speech_to_text_model = ModelFactory.get_model("speech_to_text")
193
225
 
194
- speech_to_text_model = ModelFactory.get_model("speech_to_text")
195
226
  concurrency = get_audio_concurrency()
196
227
  semaphore = asyncio.Semaphore(concurrency)
197
228
 
@@ -2,7 +2,6 @@ from typing import Dict, Optional, Union
2
2
 
3
3
  from ai_prompter import Prompter
4
4
  from esperanto import LanguageModel
5
- from esperanto.common_types import Message
6
5
  from pydantic import BaseModel, Field
7
6
 
8
7
  from content_core.models import ModelFactory
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: content-core
3
- Version: 1.6.0
3
+ Version: 1.7.0
4
4
  Summary: Extract what matters from any media source. Available as Python Library, macOS Service, CLI and MCP Server
5
5
  Author-email: LUIS NOVO <lfnovo@gmail.com>
6
6
  License-File: LICENSE
@@ -263,6 +263,14 @@ cleaned_text = await cc.clean("...messy text with [brackets] and extra spaces...
263
263
 
264
264
  # Summarize content with optional context
265
265
  summary = await cc.summarize_content("long article text", context="explain to a child")
266
+
267
+ # Extract audio with custom speech-to-text model
268
+ from content_core.common import ProcessSourceInput
269
+ result = await cc.extract(ProcessSourceInput(
270
+ file_path="interview.mp3",
271
+ audio_provider="openai",
272
+ audio_model="whisper-1"
273
+ ))
266
274
  ```
267
275
 
268
276
  ## Documentation
@@ -5,10 +5,10 @@ content_core/logging.py,sha256=oeRdWKknEolptopxF1IvnEGEc0ZUw45QXYUEZ71GcdY,438
5
5
  content_core/models.py,sha256=Kt6tWdAX87eQ2tL6eTwcHU7_NIRnN4exP4RzV2WrMig,881
6
6
  content_core/models_config.yaml,sha256=Yr-GS94ffxnkaWojUfpErUMM7m_MShsYjR6QuDjMzwo,444
7
7
  content_core/py.typed,sha256=pLuU3XTTeVpXo4UomOjcvAIQqOrzIotlWlJ3KFo2lxQ,154
8
- content_core/templated_message.py,sha256=jsjGqD-zf__pV4P0eo9cffTK2C90-VggL64qNYejFo0,1615
8
+ content_core/templated_message.py,sha256=F4ysbVUWG1V3-pT8NYbCzP5mJN_qRYtiWPa9gxjB9v0,1572
9
9
  content_core/common/__init__.py,sha256=SjDp-0QRjX9PMubyTjv77_GrUqm6eC4gBuXr593JVK4,525
10
10
  content_core/common/exceptions.py,sha256=NpYedVbckIq4kP2wek7bicMVgGGn0fkhCvid5cIxfy4,1304
11
- content_core/common/state.py,sha256=K5jsDg4l2GSaoGyFYzdd1GW14vLaAxdxes8vUrPNVkE,1622
11
+ content_core/common/state.py,sha256=Xxxtdi650x4zkNX3yXA9Jx79GAzud7Vu-I7eNEjHlhI,2010
12
12
  content_core/common/types.py,sha256=DOQFW5ySHELc_mZU6G_7PUy1kmnP4aU4IpMyyXDQcBE,177
13
13
  content_core/common/utils.py,sha256=0o4jovPEw_6wu7EcPPbDNZskbhhfLUBJBvRmp0Yc4R4,1182
14
14
  content_core/content/__init__.py,sha256=7IxfLTUHKyHjoT4MfWM2PX2J3QBeYcuERzE9vFeFiQM,230
@@ -24,7 +24,7 @@ content_core/mcp/__init__.py,sha256=KNZYH4F9AoW1Orw1BtO3n92Cn-127hI7iF9gnGadueU,
24
24
  content_core/mcp/server.py,sha256=ql0uXHkIbZlHQUhUQ4CaRnj19xT6t8ErydWntFgmtUg,7021
25
25
  content_core/notebooks/run.ipynb,sha256=8gbFln9WLrli_qWJB8SKQKcSNbAv25DvN5Cu4EAAeBQ,370952
26
26
  content_core/notebooks/urls.ipynb,sha256=gSmiSzmbol_Li36w8tpUsy5QgRbrnBx94Ry2zHwMvwY,7107
27
- content_core/processors/audio.py,sha256=CYwoTDPsVUDALHuz_EHcnjVfsKF8XjQmvmX8c-OmMNU,8462
27
+ content_core/processors/audio.py,sha256=h4aPff8WjDklE2iCviuAEEAYJTTxmWh9nOgMYJHWzmM,10202
28
28
  content_core/processors/docling.py,sha256=lf_NHh255gn4d2EymJYqyH2QiAgQDiJCY3t6Ne7R9rU,2507
29
29
  content_core/processors/office.py,sha256=DXkfmjqUhmhP6rJaO5Z5Y9sv-iK0zaPZ3waynFIPtsk,12153
30
30
  content_core/processors/pdf.py,sha256=TTDhfV2INtXumFDjLJFNMRfpbJ_tqwIcSBDzuThKxJI,10617
@@ -36,8 +36,8 @@ content_core/tools/__init__.py,sha256=DuJmd7fE-NpDvLP8IW1XY5MUkAQcdks52rn2jk4N8j
36
36
  content_core/tools/cleanup.py,sha256=5IdKedsFyRQMdYzgFSKtsfyxJldbroXQXHesHICNENI,523
37
37
  content_core/tools/extract.py,sha256=-r2_jsuMMXyXxGVqWhh1ilNPo_UMYAbw3Pkp1FzPy5g,577
38
38
  content_core/tools/summarize.py,sha256=DPfeglLWB08q8SvHrsKpOKZ35XjduUDs2J02ISwjdj0,596
39
- content_core-1.6.0.dist-info/METADATA,sha256=bBxEINm9h2ppJIia11flDRDH7UshzamVrHKHGxHrmjs,21963
40
- content_core-1.6.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
41
- content_core-1.6.0.dist-info/entry_points.txt,sha256=ifbBxw37b7gAxZXoduS15KtqHuMHuU58STRkEmgM2zA,147
42
- content_core-1.6.0.dist-info/licenses/LICENSE,sha256=myj0z2T4qIkenCgLsRfx7Wk6UqCQNj5c7O14Qx4zpGg,1066
43
- content_core-1.6.0.dist-info/RECORD,,
39
+ content_core-1.7.0.dist-info/METADATA,sha256=l3oDAdfN_gMFfOgHz3fELrjSxUXq8AKRKbC5uVF6mzM,22201
40
+ content_core-1.7.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
41
+ content_core-1.7.0.dist-info/entry_points.txt,sha256=ifbBxw37b7gAxZXoduS15KtqHuMHuU58STRkEmgM2zA,147
42
+ content_core-1.7.0.dist-info/licenses/LICENSE,sha256=myj0z2T4qIkenCgLsRfx7Wk6UqCQNj5c7O14Qx4zpGg,1066
43
+ content_core-1.7.0.dist-info/RECORD,,