docling-jobkit 1.8.0__tar.gz → 1.8.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/PKG-INFO +1 -1
  2. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/convert/manager.py +56 -4
  3. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/pyproject.toml +1 -1
  4. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/.gitignore +0 -0
  5. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/LICENSE +0 -0
  6. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/README.md +0 -0
  7. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/__init__.py +0 -0
  8. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/cli/__init__.py +0 -0
  9. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/cli/local.py +0 -0
  10. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/connectors/__init__.py +0 -0
  11. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/connectors/google_drive_helper.py +0 -0
  12. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/connectors/google_drive_source_processor.py +0 -0
  13. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/connectors/google_drive_target_processor.py +0 -0
  14. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/connectors/http_source_processor.py +0 -0
  15. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/connectors/s3_helper.py +0 -0
  16. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/connectors/s3_source_processor.py +0 -0
  17. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/connectors/s3_target_processor.py +0 -0
  18. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/connectors/source_processor.py +0 -0
  19. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/connectors/source_processor_factory.py +0 -0
  20. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/connectors/target_processor.py +0 -0
  21. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/connectors/target_processor_factory.py +0 -0
  22. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/convert/__init__.py +0 -0
  23. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/convert/chunking.py +0 -0
  24. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/convert/results.py +0 -0
  25. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/convert/results_processor.py +0 -0
  26. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/datamodel/__init__.py +0 -0
  27. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/datamodel/callback.py +0 -0
  28. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/datamodel/chunking.py +0 -0
  29. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/datamodel/convert.py +0 -0
  30. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/datamodel/google_drive_coords.py +0 -0
  31. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/datamodel/http_inputs.py +0 -0
  32. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/datamodel/result.py +0 -0
  33. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/datamodel/s3_coords.py +0 -0
  34. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/datamodel/task.py +0 -0
  35. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/datamodel/task_meta.py +0 -0
  36. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/datamodel/task_sources.py +0 -0
  37. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/datamodel/task_targets.py +0 -0
  38. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/kfp_pipeline/__init__.py +0 -0
  39. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/kfp_pipeline/docling-s3in-s3out.yaml +0 -0
  40. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/kfp_pipeline/docling_s3in_s3out.py +0 -0
  41. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/kfp_pipeline/docling_s3in_s3out_with_infer.py +0 -0
  42. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/kfp_pipeline/docling_s3in_s3out_with_infer.yaml +0 -0
  43. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/orchestrators/__init__.py +0 -0
  44. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/orchestrators/base_notifier.py +0 -0
  45. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/orchestrators/base_orchestrator.py +0 -0
  46. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/orchestrators/kfp/__init__.py +0 -0
  47. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/orchestrators/kfp/kfp_pipeline.py +0 -0
  48. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/orchestrators/kfp/notify.py +0 -0
  49. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/orchestrators/kfp/orchestrator.py +0 -0
  50. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/orchestrators/local/__init__.py +0 -0
  51. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/orchestrators/local/orchestrator.py +0 -0
  52. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/orchestrators/local/worker.py +0 -0
  53. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/orchestrators/rq/__init__.py +0 -0
  54. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/orchestrators/rq/orchestrator.py +0 -0
  55. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/orchestrators/rq/worker.py +0 -0
  56. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/py.typed +0 -0
  57. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/ray_job/__init__.py +0 -0
  58. {docling_jobkit-1.8.0 → docling_jobkit-1.8.1}/docling_jobkit/ray_job/main.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling-jobkit
3
- Version: 1.8.0
3
+ Version: 1.8.1
4
4
  Summary: Running a distributed job processing documents with Docling.
5
5
  Project-URL: Homepage, https://github.com/docling-project/docling-jobkit
6
6
  Project-URL: Repository, https://github.com/docling-project/docling-jobkit
@@ -31,7 +31,12 @@ from docling.datamodel.pipeline_options import (
31
31
  VlmPipelineOptions,
32
32
  )
33
33
  from docling.datamodel.pipeline_options_vlm_model import ApiVlmOptions, InlineVlmOptions
34
- from docling.document_converter import DocumentConverter, FormatOption, PdfFormatOption
34
+ from docling.document_converter import (
35
+ DocumentConverter,
36
+ FormatOption,
37
+ ImageFormatOption,
38
+ PdfFormatOption,
39
+ )
35
40
  from docling.models.factories import get_ocr_factory
36
41
  from docling.pipeline.vlm_pipeline import VlmPipeline
37
42
  from docling_core.types.doc import ImageRefMode
@@ -68,12 +73,28 @@ def _hash_pdf_format_option(pdf_format_option: PdfFormatOption) -> bytes:
68
73
  data["pipeline_options"] = pdf_format_option.pipeline_options.model_dump(
69
74
  serialize_as_any=True, mode="json"
70
75
  )
76
+ data["pipeline_options_type"] = (
77
+ f"{pdf_format_option.pipeline_options.__class__.__module__}."
78
+ f"{pdf_format_option.pipeline_options.__class__.__qualname__}"
79
+ )
80
+ else:
81
+ data["pipeline_options_type"] = None
71
82
 
72
83
  # Replace `pipeline_cls` with a string representation
73
- data["pipeline_cls"] = repr(data["pipeline_cls"])
84
+ pipeline_cls = pdf_format_option.pipeline_cls
85
+ data["pipeline_cls"] = (
86
+ f"{pipeline_cls.__module__}.{pipeline_cls.__qualname__}"
87
+ if pipeline_cls is not None
88
+ else "None"
89
+ )
74
90
 
75
91
  # Replace `backend` with a string representation
76
- data["backend"] = repr(data["backend"])
92
+ backend = pdf_format_option.backend
93
+ data["backend"] = (
94
+ f"{backend.__module__}.{backend.__qualname__}"
95
+ if backend is not None
96
+ else "None"
97
+ )
77
98
 
78
99
  # Serialize the dictionary to JSON with sorted keys to have consistent hashes
79
100
  serialized_data = json.dumps(data, sort_keys=True)
@@ -121,9 +142,19 @@ class DoclingConverterManager:
121
142
  @lru_cache(maxsize=cache_size)
122
143
  def _get_converter_from_hash(options_hash: bytes) -> DocumentConverter:
123
144
  pdf_format_option = self._options_map[options_hash]
145
+ image_format_option: FormatOption = pdf_format_option
146
+ if isinstance(pdf_format_option.pipeline_cls, type) and issubclass(
147
+ pdf_format_option.pipeline_cls, VlmPipeline
148
+ ):
149
+ image_format_option = ImageFormatOption(
150
+ pipeline_cls=pdf_format_option.pipeline_cls,
151
+ pipeline_options=pdf_format_option.pipeline_options,
152
+ backend_options=pdf_format_option.backend_options,
153
+ )
154
+
124
155
  format_options: dict[InputFormat, FormatOption] = {
125
156
  InputFormat.PDF: pdf_format_option,
126
- InputFormat.IMAGE: pdf_format_option,
157
+ InputFormat.IMAGE: image_format_option,
127
158
  }
128
159
 
129
160
  return DocumentConverter(format_options=format_options)
@@ -282,6 +313,27 @@ class DoclingConverterManager:
282
313
  request.vlm_pipeline_model_api.model_dump()
283
314
  )
284
315
 
316
+ pipeline_options.do_picture_classification = request.do_picture_classification
317
+ pipeline_options.do_picture_description = request.do_picture_description
318
+
319
+ if request.picture_description_local is not None:
320
+ pipeline_options.picture_description_options = (
321
+ PictureDescriptionVlmOptions.model_validate(
322
+ request.picture_description_local.model_dump()
323
+ )
324
+ )
325
+
326
+ if request.picture_description_api is not None:
327
+ pipeline_options.picture_description_options = (
328
+ PictureDescriptionApiOptions.model_validate(
329
+ request.picture_description_api.model_dump()
330
+ )
331
+ )
332
+
333
+ pipeline_options.picture_description_options.picture_area_threshold = (
334
+ request.picture_description_area_threshold
335
+ )
336
+
285
337
  return pipeline_options
286
338
 
287
339
  # Computes the PDF pipeline options and returns the PdfFormatOption and its hash
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "docling-jobkit"
7
- version = "1.8.0" # DO NOT EDIT, updated automatically
7
+ version = "1.8.1" # DO NOT EDIT, updated automatically
8
8
  description = "Running a distributed job processing documents with Docling."
9
9
  readme = "README.md"
10
10
  license = "MIT"
File without changes
File without changes