content-core 0.7.0__tar.gz → 0.7.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of content-core might be problematic. Click here for more details.
- {content_core-0.7.0 → content_core-0.7.2}/PKG-INFO +1 -1
- {content_core-0.7.0 → content_core-0.7.2}/pyproject.toml +1 -1
- {content_core-0.7.0 → content_core-0.7.2}/src/content_core/common/state.py +9 -2
- {content_core-0.7.0 → content_core-0.7.2}/uv.lock +1006 -847
- {content_core-0.7.0 → content_core-0.7.2}/.github/PULL_REQUEST_TEMPLATE.md +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/.github/workflows/publish.yml +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/.gitignore +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/.python-version +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/CONTRIBUTING.md +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/LICENSE +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/Makefile +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/README.md +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/docs/processors.md +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/docs/usage.md +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/prompts/content/cleanup.jinja +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/prompts/content/summarize.jinja +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/src/content_core/__init__.py +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/src/content_core/cc_config.yaml +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/src/content_core/common/__init__.py +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/src/content_core/common/exceptions.py +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/src/content_core/common/utils.py +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/src/content_core/config.py +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/src/content_core/content/__init__.py +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/src/content_core/content/cleanup/__init__.py +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/src/content_core/content/cleanup/core.py +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/src/content_core/content/extraction/__init__.py +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/src/content_core/content/extraction/graph.py +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/src/content_core/content/summary/__init__.py +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/src/content_core/content/summary/core.py +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/src/content_core/logging.py +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/src/content_core/models.py +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/src/content_core/models_config.yaml +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/src/content_core/notebooks/run.ipynb +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/src/content_core/processors/audio.py +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/src/content_core/processors/docling.py +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/src/content_core/processors/office.py +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/src/content_core/processors/pdf.py +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/src/content_core/processors/text.py +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/src/content_core/processors/url.py +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/src/content_core/processors/video.py +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/src/content_core/processors/youtube.py +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/src/content_core/py.typed +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/src/content_core/templated_message.py +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/src/content_core/tools/__init__.py +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/src/content_core/tools/cleanup.py +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/src/content_core/tools/extract.py +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/src/content_core/tools/summarize.py +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/tests/input_content/file.docx +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/tests/input_content/file.epub +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/tests/input_content/file.md +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/tests/input_content/file.mp3 +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/tests/input_content/file.mp4 +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/tests/input_content/file.pdf +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/tests/input_content/file.pptx +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/tests/input_content/file.txt +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/tests/input_content/file.xlsx +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/tests/input_content/file_audio.mp3 +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/tests/integration/test_extraction.py +0 -0
- {content_core-0.7.0 → content_core-0.7.2}/tests/unit/test_docling.py +0 -0
|
@@ -13,8 +13,13 @@ class ProcessSourceState(BaseModel):
|
|
|
13
13
|
identified_provider: Optional[str] = ""
|
|
14
14
|
metadata: Optional[dict] = Field(default_factory=lambda: {})
|
|
15
15
|
content: Optional[str] = ""
|
|
16
|
-
engine: Optional[str] = Field(
|
|
17
|
-
|
|
16
|
+
engine: Optional[str] = Field(
|
|
17
|
+
default=None, description="Override extraction engine: 'legacy' or 'docling'"
|
|
18
|
+
)
|
|
19
|
+
output_format: Optional[str] = Field(
|
|
20
|
+
default=None,
|
|
21
|
+
description="Override Docling output format: 'markdown', 'html', or 'json'",
|
|
22
|
+
)
|
|
18
23
|
|
|
19
24
|
|
|
20
25
|
class ProcessSourceInput(BaseModel):
|
|
@@ -27,6 +32,8 @@ class ProcessSourceInput(BaseModel):
|
|
|
27
32
|
|
|
28
33
|
class ProcessSourceOutput(BaseModel):
|
|
29
34
|
title: Optional[str] = ""
|
|
35
|
+
file_path: Optional[str] = ""
|
|
36
|
+
url: Optional[str] = ""
|
|
30
37
|
source_type: Optional[str] = ""
|
|
31
38
|
identified_type: Optional[str] = ""
|
|
32
39
|
identified_provider: Optional[str] = ""
|