docling-jobkit 1.9.1__tar.gz → 1.10.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/PKG-INFO +2 -2
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/convert/manager.py +1 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/datamodel/convert.py +11 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/orchestrators/rq/orchestrator.py +3 -1
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/orchestrators/rq/worker.py +10 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/pyproject.toml +2 -2
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/.gitignore +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/LICENSE +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/README.md +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/__init__.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/cli/__init__.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/cli/local.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/cli/multiproc.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/connectors/__init__.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/connectors/google_drive_helper.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/connectors/google_drive_source_processor.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/connectors/google_drive_target_processor.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/connectors/http_source_processor.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/connectors/local_path_source_processor.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/connectors/local_path_target_processor.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/connectors/s3_helper.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/connectors/s3_source_processor.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/connectors/s3_target_processor.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/connectors/source_processor.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/connectors/source_processor_factory.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/connectors/target_processor.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/connectors/target_processor_factory.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/convert/__init__.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/convert/chunking.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/convert/results.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/convert/results_processor.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/datamodel/__init__.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/datamodel/callback.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/datamodel/chunking.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/datamodel/google_drive_coords.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/datamodel/http_inputs.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/datamodel/result.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/datamodel/s3_coords.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/datamodel/task.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/datamodel/task_meta.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/datamodel/task_sources.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/datamodel/task_targets.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/kfp_pipeline/__init__.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/kfp_pipeline/docling-s3in-s3out.yaml +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/kfp_pipeline/docling_s3in_s3out.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/kfp_pipeline/docling_s3in_s3out_with_infer.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/kfp_pipeline/docling_s3in_s3out_with_infer.yaml +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/orchestrators/__init__.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/orchestrators/base_notifier.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/orchestrators/base_orchestrator.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/orchestrators/kfp/__init__.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/orchestrators/kfp/kfp_pipeline.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/orchestrators/kfp/notify.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/orchestrators/kfp/orchestrator.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/orchestrators/local/__init__.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/orchestrators/local/orchestrator.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/orchestrators/local/worker.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/orchestrators/rq/__init__.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/py.typed +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/ray_job/__init__.py +0 -0
- {docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/ray_job/main.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: docling-jobkit
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.10.0
|
|
4
4
|
Summary: Running a distributed job processing documents with Docling.
|
|
5
5
|
Project-URL: Homepage, https://github.com/docling-project/docling-jobkit
|
|
6
6
|
Project-URL: Documentation, https://docling-project.github.io/docling/usage/jobkit/
|
|
@@ -27,7 +27,7 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
|
27
27
|
Classifier: Typing :: Typed
|
|
28
28
|
Requires-Python: >=3.10
|
|
29
29
|
Requires-Dist: boto3~=1.35
|
|
30
|
-
Requires-Dist: docling~=2.
|
|
30
|
+
Requires-Dist: docling~=2.72
|
|
31
31
|
Requires-Dist: httpx<1,>=0.28
|
|
32
32
|
Requires-Dist: pandas~=2.2
|
|
33
33
|
Requires-Dist: pydantic-settings~=2.4
|
|
@@ -208,6 +208,7 @@ class DoclingConverterManager:
|
|
|
208
208
|
do_code_enrichment=request.do_code_enrichment,
|
|
209
209
|
do_formula_enrichment=request.do_formula_enrichment,
|
|
210
210
|
do_picture_classification=request.do_picture_classification,
|
|
211
|
+
do_chart_extraction=request.do_chart_extraction,
|
|
211
212
|
do_picture_description=request.do_picture_description,
|
|
212
213
|
)
|
|
213
214
|
pipeline_options.table_structure_options = TableStructureOptions(
|
|
@@ -454,6 +454,17 @@ class ConvertDocumentsOptions(BaseModel):
|
|
|
454
454
|
),
|
|
455
455
|
] = False
|
|
456
456
|
|
|
457
|
+
do_chart_extraction: Annotated[
|
|
458
|
+
bool,
|
|
459
|
+
Field(
|
|
460
|
+
description=(
|
|
461
|
+
"If enabled, extract numberic data from charts. "
|
|
462
|
+
"Boolean. Optional, defaults to false."
|
|
463
|
+
),
|
|
464
|
+
examples=[False],
|
|
465
|
+
),
|
|
466
|
+
] = False
|
|
467
|
+
|
|
457
468
|
do_picture_description: Annotated[
|
|
458
469
|
bool,
|
|
459
470
|
Field(
|
|
@@ -187,6 +187,16 @@ def docling_task(
|
|
|
187
187
|
return result_key
|
|
188
188
|
|
|
189
189
|
|
|
190
|
+
def clear_cache_task(conversion_manager: DoclingConverterManager, **_):
|
|
191
|
+
"""RQ job that clears the converter cache on the worker."""
|
|
192
|
+
_log.info("Clearing converter cache on worker")
|
|
193
|
+
conversion_manager.clear_cache()
|
|
194
|
+
import gc
|
|
195
|
+
|
|
196
|
+
gc.collect()
|
|
197
|
+
_log.info("Converter cache cleared")
|
|
198
|
+
|
|
199
|
+
|
|
190
200
|
def run_worker(
|
|
191
201
|
rq_config: Optional[RQOrchestratorConfig] = None,
|
|
192
202
|
cm_config: Optional[DoclingConverterManagerConfig] = None,
|
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "docling-jobkit"
|
|
7
|
-
version = "1.
|
|
7
|
+
version = "1.10.0" # DO NOT EDIT, updated automatically
|
|
8
8
|
description = "Running a distributed job processing documents with Docling."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = "MIT"
|
|
@@ -42,7 +42,7 @@ classifiers = [
|
|
|
42
42
|
]
|
|
43
43
|
requires-python = ">=3.10"
|
|
44
44
|
dependencies = [
|
|
45
|
-
"docling~=2.
|
|
45
|
+
"docling~=2.72",
|
|
46
46
|
"pydantic~=2.10",
|
|
47
47
|
"pydantic-settings~=2.4",
|
|
48
48
|
"boto3~=1.35",
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/connectors/google_drive_helper.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/connectors/http_source_processor.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/connectors/s3_source_processor.py
RENAMED
|
File without changes
|
{docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/connectors/s3_target_processor.py
RENAMED
|
File without changes
|
{docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/connectors/source_processor.py
RENAMED
|
File without changes
|
{docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/connectors/source_processor_factory.py
RENAMED
|
File without changes
|
{docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/connectors/target_processor.py
RENAMED
|
File without changes
|
{docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/connectors/target_processor_factory.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/datamodel/google_drive_coords.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/kfp_pipeline/docling-s3in-s3out.yaml
RENAMED
|
File without changes
|
{docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/kfp_pipeline/docling_s3in_s3out.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/orchestrators/base_notifier.py
RENAMED
|
File without changes
|
{docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/orchestrators/base_orchestrator.py
RENAMED
|
File without changes
|
|
File without changes
|
{docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/orchestrators/kfp/kfp_pipeline.py
RENAMED
|
File without changes
|
|
File without changes
|
{docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/orchestrators/kfp/orchestrator.py
RENAMED
|
File without changes
|
{docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/orchestrators/local/__init__.py
RENAMED
|
File without changes
|
{docling_jobkit-1.9.1 → docling_jobkit-1.10.0}/docling_jobkit/orchestrators/local/orchestrator.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|