content-core 1.6.0__py3-none-any.whl → 1.7.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of content-core might be problematic. Click here for more details.
- content_core/common/state.py +10 -0
- content_core/processors/audio.py +32 -1
- content_core/templated_message.py +0 -1
- {content_core-1.6.0.dist-info → content_core-1.7.0.dist-info}/METADATA +9 -1
- {content_core-1.6.0.dist-info → content_core-1.7.0.dist-info}/RECORD +8 -8
- {content_core-1.6.0.dist-info → content_core-1.7.0.dist-info}/WHEEL +0 -0
- {content_core-1.6.0.dist-info → content_core-1.7.0.dist-info}/entry_points.txt +0 -0
- {content_core-1.6.0.dist-info → content_core-1.7.0.dist-info}/licenses/LICENSE +0 -0
content_core/common/state.py
CHANGED
|
@@ -27,6 +27,14 @@ class ProcessSourceState(BaseModel):
|
|
|
27
27
|
default=None,
|
|
28
28
|
description="Override Docling output format: 'markdown', 'html', or 'json'",
|
|
29
29
|
)
|
|
30
|
+
audio_provider: Optional[str] = Field(
|
|
31
|
+
default=None,
|
|
32
|
+
description="Override speech-to-text provider (e.g., 'openai', 'google')",
|
|
33
|
+
)
|
|
34
|
+
audio_model: Optional[str] = Field(
|
|
35
|
+
default=None,
|
|
36
|
+
description="Override speech-to-text model name (e.g., 'whisper-1', 'chirp')",
|
|
37
|
+
)
|
|
30
38
|
|
|
31
39
|
|
|
32
40
|
class ProcessSourceInput(BaseModel):
|
|
@@ -36,6 +44,8 @@ class ProcessSourceInput(BaseModel):
|
|
|
36
44
|
document_engine: Optional[str] = None
|
|
37
45
|
url_engine: Optional[str] = None
|
|
38
46
|
output_format: Optional[str] = None
|
|
47
|
+
audio_provider: Optional[str] = None
|
|
48
|
+
audio_model: Optional[str] = None
|
|
39
49
|
|
|
40
50
|
|
|
41
51
|
class ProcessSourceOutput(BaseModel):
|
content_core/processors/audio.py
CHANGED
|
@@ -190,8 +190,39 @@ async def extract_audio_data(data: ProcessSourceState):
|
|
|
190
190
|
|
|
191
191
|
# Transcribe audio files in parallel with concurrency limit
|
|
192
192
|
from content_core.models import ModelFactory
|
|
193
|
+
from esperanto import AIFactory
|
|
194
|
+
|
|
195
|
+
# Determine which model to use based on state parameters
|
|
196
|
+
if data.audio_provider and data.audio_model:
|
|
197
|
+
# Custom model provided - create new instance
|
|
198
|
+
try:
|
|
199
|
+
logger.info(
|
|
200
|
+
f"Using custom audio model: {data.audio_provider}/{data.audio_model}"
|
|
201
|
+
)
|
|
202
|
+
speech_to_text_model = AIFactory.create_speech_to_text(
|
|
203
|
+
data.audio_provider, data.audio_model
|
|
204
|
+
)
|
|
205
|
+
except Exception as e:
|
|
206
|
+
logger.error(
|
|
207
|
+
f"Failed to create custom audio model '{data.audio_provider}/{data.audio_model}': {e}. "
|
|
208
|
+
f"Check that the provider and model are supported by Esperanto. "
|
|
209
|
+
f"Falling back to default model."
|
|
210
|
+
)
|
|
211
|
+
speech_to_text_model = ModelFactory.get_model("speech_to_text")
|
|
212
|
+
elif data.audio_provider or data.audio_model:
|
|
213
|
+
# Only one parameter provided - log warning and use default
|
|
214
|
+
missing = "audio_model" if data.audio_provider else "audio_provider"
|
|
215
|
+
provided = "audio_provider" if data.audio_provider else "audio_model"
|
|
216
|
+
logger.warning(
|
|
217
|
+
f"{provided} provided without {missing}. "
|
|
218
|
+
f"Both audio_provider and audio_model must be specified together. "
|
|
219
|
+
f"Falling back to default model."
|
|
220
|
+
)
|
|
221
|
+
speech_to_text_model = ModelFactory.get_model("speech_to_text")
|
|
222
|
+
else:
|
|
223
|
+
# No custom parameters - use default (backward compatible)
|
|
224
|
+
speech_to_text_model = ModelFactory.get_model("speech_to_text")
|
|
193
225
|
|
|
194
|
-
speech_to_text_model = ModelFactory.get_model("speech_to_text")
|
|
195
226
|
concurrency = get_audio_concurrency()
|
|
196
227
|
semaphore = asyncio.Semaphore(concurrency)
|
|
197
228
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: content-core
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.7.0
|
|
4
4
|
Summary: Extract what matters from any media source. Available as Python Library, macOS Service, CLI and MCP Server
|
|
5
5
|
Author-email: LUIS NOVO <lfnovo@gmail.com>
|
|
6
6
|
License-File: LICENSE
|
|
@@ -263,6 +263,14 @@ cleaned_text = await cc.clean("...messy text with [brackets] and extra spaces...
|
|
|
263
263
|
|
|
264
264
|
# Summarize content with optional context
|
|
265
265
|
summary = await cc.summarize_content("long article text", context="explain to a child")
|
|
266
|
+
|
|
267
|
+
# Extract audio with custom speech-to-text model
|
|
268
|
+
from content_core.common import ProcessSourceInput
|
|
269
|
+
result = await cc.extract(ProcessSourceInput(
|
|
270
|
+
file_path="interview.mp3",
|
|
271
|
+
audio_provider="openai",
|
|
272
|
+
audio_model="whisper-1"
|
|
273
|
+
))
|
|
266
274
|
```
|
|
267
275
|
|
|
268
276
|
## Documentation
|
|
@@ -5,10 +5,10 @@ content_core/logging.py,sha256=oeRdWKknEolptopxF1IvnEGEc0ZUw45QXYUEZ71GcdY,438
|
|
|
5
5
|
content_core/models.py,sha256=Kt6tWdAX87eQ2tL6eTwcHU7_NIRnN4exP4RzV2WrMig,881
|
|
6
6
|
content_core/models_config.yaml,sha256=Yr-GS94ffxnkaWojUfpErUMM7m_MShsYjR6QuDjMzwo,444
|
|
7
7
|
content_core/py.typed,sha256=pLuU3XTTeVpXo4UomOjcvAIQqOrzIotlWlJ3KFo2lxQ,154
|
|
8
|
-
content_core/templated_message.py,sha256=
|
|
8
|
+
content_core/templated_message.py,sha256=F4ysbVUWG1V3-pT8NYbCzP5mJN_qRYtiWPa9gxjB9v0,1572
|
|
9
9
|
content_core/common/__init__.py,sha256=SjDp-0QRjX9PMubyTjv77_GrUqm6eC4gBuXr593JVK4,525
|
|
10
10
|
content_core/common/exceptions.py,sha256=NpYedVbckIq4kP2wek7bicMVgGGn0fkhCvid5cIxfy4,1304
|
|
11
|
-
content_core/common/state.py,sha256=
|
|
11
|
+
content_core/common/state.py,sha256=Xxxtdi650x4zkNX3yXA9Jx79GAzud7Vu-I7eNEjHlhI,2010
|
|
12
12
|
content_core/common/types.py,sha256=DOQFW5ySHELc_mZU6G_7PUy1kmnP4aU4IpMyyXDQcBE,177
|
|
13
13
|
content_core/common/utils.py,sha256=0o4jovPEw_6wu7EcPPbDNZskbhhfLUBJBvRmp0Yc4R4,1182
|
|
14
14
|
content_core/content/__init__.py,sha256=7IxfLTUHKyHjoT4MfWM2PX2J3QBeYcuERzE9vFeFiQM,230
|
|
@@ -24,7 +24,7 @@ content_core/mcp/__init__.py,sha256=KNZYH4F9AoW1Orw1BtO3n92Cn-127hI7iF9gnGadueU,
|
|
|
24
24
|
content_core/mcp/server.py,sha256=ql0uXHkIbZlHQUhUQ4CaRnj19xT6t8ErydWntFgmtUg,7021
|
|
25
25
|
content_core/notebooks/run.ipynb,sha256=8gbFln9WLrli_qWJB8SKQKcSNbAv25DvN5Cu4EAAeBQ,370952
|
|
26
26
|
content_core/notebooks/urls.ipynb,sha256=gSmiSzmbol_Li36w8tpUsy5QgRbrnBx94Ry2zHwMvwY,7107
|
|
27
|
-
content_core/processors/audio.py,sha256=
|
|
27
|
+
content_core/processors/audio.py,sha256=h4aPff8WjDklE2iCviuAEEAYJTTxmWh9nOgMYJHWzmM,10202
|
|
28
28
|
content_core/processors/docling.py,sha256=lf_NHh255gn4d2EymJYqyH2QiAgQDiJCY3t6Ne7R9rU,2507
|
|
29
29
|
content_core/processors/office.py,sha256=DXkfmjqUhmhP6rJaO5Z5Y9sv-iK0zaPZ3waynFIPtsk,12153
|
|
30
30
|
content_core/processors/pdf.py,sha256=TTDhfV2INtXumFDjLJFNMRfpbJ_tqwIcSBDzuThKxJI,10617
|
|
@@ -36,8 +36,8 @@ content_core/tools/__init__.py,sha256=DuJmd7fE-NpDvLP8IW1XY5MUkAQcdks52rn2jk4N8j
|
|
|
36
36
|
content_core/tools/cleanup.py,sha256=5IdKedsFyRQMdYzgFSKtsfyxJldbroXQXHesHICNENI,523
|
|
37
37
|
content_core/tools/extract.py,sha256=-r2_jsuMMXyXxGVqWhh1ilNPo_UMYAbw3Pkp1FzPy5g,577
|
|
38
38
|
content_core/tools/summarize.py,sha256=DPfeglLWB08q8SvHrsKpOKZ35XjduUDs2J02ISwjdj0,596
|
|
39
|
-
content_core-1.
|
|
40
|
-
content_core-1.
|
|
41
|
-
content_core-1.
|
|
42
|
-
content_core-1.
|
|
43
|
-
content_core-1.
|
|
39
|
+
content_core-1.7.0.dist-info/METADATA,sha256=l3oDAdfN_gMFfOgHz3fELrjSxUXq8AKRKbC5uVF6mzM,22201
|
|
40
|
+
content_core-1.7.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
41
|
+
content_core-1.7.0.dist-info/entry_points.txt,sha256=ifbBxw37b7gAxZXoduS15KtqHuMHuU58STRkEmgM2zA,147
|
|
42
|
+
content_core-1.7.0.dist-info/licenses/LICENSE,sha256=myj0z2T4qIkenCgLsRfx7Wk6UqCQNj5c7O14Qx4zpGg,1066
|
|
43
|
+
content_core-1.7.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|