docling-jobkit 1.7.1__tar.gz → 1.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/PKG-INFO +3 -3
  2. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/convert/manager.py +18 -0
  3. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/pyproject.toml +3 -3
  4. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/.gitignore +0 -0
  5. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/LICENSE +0 -0
  6. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/README.md +0 -0
  7. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/__init__.py +0 -0
  8. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/cli/__init__.py +0 -0
  9. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/cli/local.py +0 -0
  10. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/connectors/__init__.py +0 -0
  11. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/connectors/google_drive_helper.py +0 -0
  12. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/connectors/google_drive_source_processor.py +0 -0
  13. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/connectors/google_drive_target_processor.py +0 -0
  14. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/connectors/http_source_processor.py +0 -0
  15. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/connectors/s3_helper.py +0 -0
  16. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/connectors/s3_source_processor.py +0 -0
  17. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/connectors/s3_target_processor.py +0 -0
  18. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/connectors/source_processor.py +0 -0
  19. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/connectors/source_processor_factory.py +0 -0
  20. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/connectors/target_processor.py +0 -0
  21. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/connectors/target_processor_factory.py +0 -0
  22. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/convert/__init__.py +0 -0
  23. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/convert/chunking.py +0 -0
  24. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/convert/results.py +0 -0
  25. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/convert/results_processor.py +0 -0
  26. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/datamodel/__init__.py +0 -0
  27. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/datamodel/callback.py +0 -0
  28. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/datamodel/chunking.py +0 -0
  29. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/datamodel/convert.py +0 -0
  30. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/datamodel/google_drive_coords.py +0 -0
  31. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/datamodel/http_inputs.py +0 -0
  32. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/datamodel/result.py +0 -0
  33. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/datamodel/s3_coords.py +0 -0
  34. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/datamodel/task.py +0 -0
  35. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/datamodel/task_meta.py +0 -0
  36. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/datamodel/task_sources.py +0 -0
  37. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/datamodel/task_targets.py +0 -0
  38. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/kfp_pipeline/__init__.py +0 -0
  39. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/kfp_pipeline/docling-s3in-s3out.yaml +0 -0
  40. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/kfp_pipeline/docling_s3in_s3out.py +0 -0
  41. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/kfp_pipeline/docling_s3in_s3out_with_infer.py +0 -0
  42. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/kfp_pipeline/docling_s3in_s3out_with_infer.yaml +0 -0
  43. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/orchestrators/__init__.py +0 -0
  44. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/orchestrators/base_notifier.py +0 -0
  45. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/orchestrators/base_orchestrator.py +0 -0
  46. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/orchestrators/kfp/__init__.py +0 -0
  47. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/orchestrators/kfp/kfp_pipeline.py +0 -0
  48. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/orchestrators/kfp/notify.py +0 -0
  49. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/orchestrators/kfp/orchestrator.py +0 -0
  50. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/orchestrators/local/__init__.py +0 -0
  51. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/orchestrators/local/orchestrator.py +0 -0
  52. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/orchestrators/local/worker.py +0 -0
  53. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/orchestrators/rq/__init__.py +0 -0
  54. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/orchestrators/rq/orchestrator.py +0 -0
  55. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/orchestrators/rq/worker.py +0 -0
  56. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/py.typed +0 -0
  57. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/ray_job/__init__.py +0 -0
  58. {docling_jobkit-1.7.1 → docling_jobkit-1.8.0}/docling_jobkit/ray_job/main.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling-jobkit
3
- Version: 1.7.1
3
+ Version: 1.8.0
4
4
  Summary: Running a distributed job processing documents with Docling.
5
5
  Project-URL: Homepage, https://github.com/docling-project/docling-jobkit
6
6
  Project-URL: Repository, https://github.com/docling-project/docling-jobkit
@@ -25,7 +25,7 @@ Classifier: Topic :: Software Development :: Libraries :: Python Modules
25
25
  Classifier: Typing :: Typed
26
26
  Requires-Python: >=3.10
27
27
  Requires-Dist: boto3~=1.35
28
- Requires-Dist: docling~=2.56
28
+ Requires-Dist: docling~=2.60
29
29
  Requires-Dist: fastparquet~=2024.11
30
30
  Requires-Dist: httpx~=0.28
31
31
  Requires-Dist: pandas~=2.2
@@ -45,7 +45,7 @@ Provides-Extra: rq
45
45
  Requires-Dist: msgpack~=1.1; extra == 'rq'
46
46
  Requires-Dist: rq~=2.4; extra == 'rq'
47
47
  Provides-Extra: vlm
48
- Requires-Dist: docling[vlm]~=2.53; extra == 'vlm'
48
+ Requires-Dist: docling[vlm]~=2.60; extra == 'vlm'
49
49
  Description-Content-Type: text/markdown
50
50
 
51
51
  # Docling Jobkit
@@ -50,6 +50,13 @@ class DoclingConverterManagerConfig(BaseModel):
50
50
  max_num_pages: int = sys.maxsize
51
51
  max_file_size: int = sys.maxsize
52
52
 
53
+ # Threading pipeline
54
+ queue_max_size: Optional[int] = None
55
+ ocr_batch_size: Optional[int] = None
56
+ layout_batch_size: Optional[int] = None
57
+ table_batch_size: Optional[int] = None
58
+ batch_polling_interval_seconds: Optional[float] = None
59
+
53
60
 
54
61
  # Custom serializer for PdfFormatOption
55
62
  # (model_dump_json does not work with some classes)
@@ -202,6 +209,17 @@ class DoclingConverterManager:
202
209
  request.picture_description_area_threshold
203
210
  )
204
211
 
212
+ # Forward the definition of the following attributes, if they are not none
213
+ for attr in (
214
+ "queue_max_size",
215
+ "ocr_batch_size",
216
+ "layout_batch_size",
217
+ "table_batch_size",
218
+ "batch_polling_interval_seconds",
219
+ ):
220
+ if value := getattr(self.config, attr):
221
+ setattr(pipeline_options, attr, value)
222
+
205
223
  return pipeline_options
206
224
 
207
225
  def _parse_backend(
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "docling-jobkit"
7
- version = "1.7.1" # DO NOT EDIT, updated automatically
7
+ version = "1.8.0" # DO NOT EDIT, updated automatically
8
8
  description = "Running a distributed job processing documents with Docling."
9
9
  readme = "README.md"
10
10
  license = "MIT"
@@ -41,7 +41,7 @@ classifiers = [
41
41
  ]
42
42
  requires-python = ">=3.10"
43
43
  dependencies = [
44
- "docling~=2.56",
44
+ "docling~=2.60",
45
45
  "pydantic~=2.10",
46
46
  "pydantic-settings~=2.4",
47
47
  "typer~=0.12",
@@ -55,7 +55,7 @@ dependencies = [
55
55
 
56
56
  [project.optional-dependencies]
57
57
  vlm = [
58
- "docling[vlm]~=2.53",
58
+ "docling[vlm]~=2.60",
59
59
  ]
60
60
  # asr = [
61
61
  # "docling[asr]~=2.53",
File without changes
File without changes