docling 2.54.0__py3-none-any.whl → 2.55.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,13 +1,15 @@
1
1
  import base64
2
+ import json
2
3
  import logging
3
4
  from io import BytesIO
4
- from typing import Dict, Optional
5
+ from typing import Dict, List, Optional
5
6
 
6
7
  import requests
7
8
  from PIL import Image
8
9
  from pydantic import AnyUrl
9
10
 
10
11
  from docling.datamodel.base_models import OpenAiApiResponse
12
+ from docling.models.utils.generation_utils import GenerationStopper
11
13
 
12
14
  _log = logging.getLogger(__name__)
13
15
 
@@ -59,3 +61,107 @@ def api_image_request(
59
61
  api_resp = OpenAiApiResponse.model_validate_json(r.text)
60
62
  generated_text = api_resp.choices[0].message.content.strip()
61
63
  return generated_text
64
+
65
+
66
+ def api_image_request_streaming(
67
+ image: Image.Image,
68
+ prompt: str,
69
+ url: AnyUrl,
70
+ *,
71
+ timeout: float = 20,
72
+ headers: Optional[Dict[str, str]] = None,
73
+ generation_stoppers: List[GenerationStopper] = [],
74
+ **params,
75
+ ) -> str:
76
+ """
77
+ Stream a chat completion from an OpenAI-compatible server (e.g., vLLM).
78
+ Parses SSE lines: 'data: {json}\\n\\n', terminated by 'data: [DONE]'.
79
+ Accumulates text and calls stopper.should_stop(window) as chunks arrive.
80
+ If stopper triggers, the HTTP connection is closed to abort server-side generation.
81
+ """
82
+ img_io = BytesIO()
83
+ image.save(img_io, "PNG")
84
+ image_b64 = base64.b64encode(img_io.getvalue()).decode("utf-8")
85
+
86
+ messages = [
87
+ {
88
+ "role": "user",
89
+ "content": [
90
+ {
91
+ "type": "image_url",
92
+ "image_url": {"url": f"data:image/png;base64,{image_b64}"},
93
+ },
94
+ {"type": "text", "text": prompt},
95
+ ],
96
+ }
97
+ ]
98
+
99
+ payload = {
100
+ "messages": messages,
101
+ "stream": True, # <-- critical for SSE streaming
102
+ **params,
103
+ }
104
+
105
+ # Debug: Log the payload to verify temperature is included
106
+ _log.debug(f"API streaming request payload: {json.dumps(payload, indent=2)}")
107
+
108
+ # Some servers require Accept: text/event-stream for SSE.
109
+ # It's safe to set it; OpenAI-compatible servers tolerate it.
110
+ hdrs = {"Accept": "text/event-stream", **(headers or {})}
111
+
112
+ # Try to force temperature via header if server ignores payload parameter
113
+ if "temperature" in params:
114
+ hdrs["X-Temperature"] = str(params["temperature"])
115
+
116
+ # Stream the HTTP response
117
+ with requests.post(
118
+ str(url), headers=hdrs, json=payload, timeout=timeout, stream=True
119
+ ) as r:
120
+ if not r.ok:
121
+ _log.error(
122
+ f"Error calling the API {url} in streaming mode. Response was {r.text}"
123
+ )
124
+ r.raise_for_status()
125
+
126
+ full_text = []
127
+ for raw_line in r.iter_lines(decode_unicode=True):
128
+ if not raw_line: # keep-alives / blank lines
129
+ continue
130
+ if not raw_line.startswith("data:"):
131
+ # Some proxies inject comments; ignore anything not starting with 'data:'
132
+ continue
133
+
134
+ data = raw_line[len("data:") :].strip()
135
+ if data == "[DONE]":
136
+ break
137
+
138
+ try:
139
+ obj = json.loads(data)
140
+ except json.JSONDecodeError:
141
+ _log.debug("Skipping non-JSON SSE chunk: %r", data[:200])
142
+ continue
143
+
144
+ # OpenAI-compatible delta format
145
+ # obj["choices"][0]["delta"]["content"] may be None or missing (e.g., tool calls)
146
+ try:
147
+ delta = obj["choices"][0].get("delta") or {}
148
+ piece = delta.get("content") or ""
149
+ except (KeyError, IndexError) as e:
150
+ _log.debug("Unexpected SSE chunk shape: %s", e)
151
+ piece = ""
152
+
153
+ if piece:
154
+ full_text.append(piece)
155
+ for stopper in generation_stoppers:
156
+ # Respect stopper's lookback window. We use a simple string window which
157
+ # works with the GenerationStopper interface.
158
+ lookback = max(1, stopper.lookback_tokens())
159
+ window = "".join(full_text)[-lookback:]
160
+ if stopper.should_stop(window):
161
+ # Break out of the loop cleanly. The context manager will handle
162
+ # closing the connection when we exit the 'with' block.
163
+ # vLLM/OpenAI-compatible servers will detect the client disconnect
164
+ # and abort the request server-side.
165
+ return "".join(full_text)
166
+
167
+ return "".join(full_text)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: docling
3
- Version: 2.54.0
3
+ Version: 2.55.0
4
4
  Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
5
5
  Author-email: Christoph Auer <cau@zurich.ibm.com>, Michele Dolfi <dol@zurich.ibm.com>, Maxim Lysak <mly@zurich.ibm.com>, Nikos Livathinos <nli@zurich.ibm.com>, Ahmed Nassar <ahn@zurich.ibm.com>, Panos Vagenas <pva@zurich.ibm.com>, Peter Staar <taa@zurich.ibm.com>
6
6
  License-Expression: MIT
@@ -37,7 +37,7 @@ Requires-Dist: requests<3.0.0,>=2.32.2
37
37
  Requires-Dist: easyocr<2.0,>=1.7
38
38
  Requires-Dist: certifi>=2024.7.4
39
39
  Requires-Dist: rtree<2.0.0,>=1.3.0
40
- Requires-Dist: typer<0.17.0,>=0.12.5
40
+ Requires-Dist: typer<0.20.0,>=0.12.5
41
41
  Requires-Dist: python-docx<2.0.0,>=1.1.2
42
42
  Requires-Dist: python-pptx<2.0.0,>=1.0.2
43
43
  Requires-Dist: beautifulsoup4<5.0.0,>=4.12.3
@@ -5,17 +5,17 @@ docling/exceptions.py,sha256=K1WnCS1leK2JtMB5ewZWKkb0EaijFgl-tRzrO9ntgPM,134
5
5
  docling/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
6
6
  docling/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
7
7
  docling/backend/abstract_backend.py,sha256=1lNxzwDTn303aXduPDVmTyXn-5ZIoWMLYqNxANGWmQQ,1658
8
- docling/backend/asciidoc_backend.py,sha256=RDNLrPJHxROiM7-NQdZn3DdvAyiPAndbSWcZo9PbCKU,14417
8
+ docling/backend/asciidoc_backend.py,sha256=ARpMBzrNCV-x6g2I0KytDM3pGLac3z4ql3hDKi3FI04,14403
9
9
  docling/backend/csv_backend.py,sha256=2g9famYG2W-ID9jEdZPxc6O8QGv1vWQfjN8pL-QMBE0,4536
10
10
  docling/backend/docling_parse_backend.py,sha256=9rUo1vPxX6QLzGqF-2B2iEYglZg6YQ3Uea00XrLluTg,7918
11
11
  docling/backend/docling_parse_v2_backend.py,sha256=3ckTfke8IICjaImlIzc3TRhG7KDuxDDba0AuCEcjA-M,9500
12
12
  docling/backend/docling_parse_v4_backend.py,sha256=xCBbaaXjNNrOaod9tmBuCbe5mL_ipmTNG2XOxVbGG3w,7891
13
- docling/backend/html_backend.py,sha256=7I3BQSmC7P47jpzXHt3OuPNhtVedJiZVEjjLykyx5pY,42245
14
- docling/backend/md_backend.py,sha256=qCI7SD9hnWWGrkG_drpzQv2Z7DVBG4Tsq3hhTsYV790,22562
13
+ docling/backend/html_backend.py,sha256=r2m3aIKwwr8Vv2Fxri1FaZFvd4EWvTQlmSPwXeD79zg,47796
14
+ docling/backend/md_backend.py,sha256=zrOUYoIYudUfigwnXRQocb_M4G_ptYfblNgr6BNTYQw,22678
15
15
  docling/backend/mets_gbs_backend.py,sha256=EA8sY6tbmGiysKGYPPZiNlK-i7Adn8bLTo-7Ym15hTU,12774
16
16
  docling/backend/msexcel_backend.py,sha256=5JRbPwOjR1r45AMeIts1rj6InbOgLBf_CtAhvNPVmsQ,19157
17
17
  docling/backend/mspowerpoint_backend.py,sha256=wJgB2JStEPfD7MPpWQlpPN7bffPxaHFUnKD4wj8SLxU,15114
18
- docling/backend/msword_backend.py,sha256=kQI9hrx_lvHn__KdxW8MbvB78snoVzA_m4jXx6f_LJ8,54419
18
+ docling/backend/msword_backend.py,sha256=Jfd57hzG8iFVAzqsOAHe5jG8LCHAIBXJhQCW0tESnMM,54405
19
19
  docling/backend/noop_backend.py,sha256=EOPbD86FzZPX-K_DpNrJh0_lC0bZz--4DpG-OagDNGY,1688
20
20
  docling/backend/pdf_backend.py,sha256=Wcd1NSrAMjXK8VicTki5p-j-JLofklt07eF0kIG17_0,3361
21
21
  docling/backend/pypdfium2_backend.py,sha256=AYhWs9S8W_TkAK0-OkRmUNf4HUZl26FP7-XYjwU5zDk,14209
@@ -27,11 +27,11 @@ docling/backend/docx/latex/omml.py,sha256=4vh9FCbXh-Tb6KJGqNwzlMUMYEnnJgBtBI24dw
27
27
  docling/backend/json/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
28
28
  docling/backend/json/docling_json_backend.py,sha256=LlFMVoZrrCfVwbDuRbNN4Xg96Lujh4xxrTBt9jGhY9I,1984
29
29
  docling/backend/xml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
30
- docling/backend/xml/jats_backend.py,sha256=LPj33EFdi2MRCakkLWrRLlUAc-B-949f8zp5gKNvBcg,25238
31
- docling/backend/xml/uspto_backend.py,sha256=nyAMr5ht7dclxkVDwsKNeiOhLQrUtRLS8JdscB2AVJg,70924
30
+ docling/backend/xml/jats_backend.py,sha256=_BWpQQg3SlsHAOOj0v2qRJoVqaQzL91GqN1tK9LxqWo,29463
31
+ docling/backend/xml/uspto_backend.py,sha256=Tv4CE7V5_QwxTNJPl90CAd_mAbwaLGy8S6s6evh1Xow,70910
32
32
  docling/chunking/__init__.py,sha256=h83TDs0AuOV6oEPLAPrn9dpGKiU-2Vg6IRNo4cv6GDA,346
33
33
  docling/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
- docling/cli/main.py,sha256=J_hXHclzT-uNu-cuKNdlc3vwCnyDRxXrJ5L2LJofzeo,32729
34
+ docling/cli/main.py,sha256=UX-5fRGVP_yGxTQez0x1PNnaNKRgWdcXGoPCHy-0uFM,32887
35
35
  docling/cli/models.py,sha256=rw_2JfeJ-k_iOLpz3JfgL1QbJY__W9nE23nHdov6VfU,6252
36
36
  docling/cli/tools.py,sha256=QhtRxQG0TVrfsMqdv5i7J0_qQy1ZZyWYnHPwJl7b5oY,322
37
37
  docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -43,12 +43,12 @@ docling/datamodel/extraction.py,sha256=7dgvtK5SuvgfB8LHAwS1FwrW1kcMQJuJG0ol8uAQg
43
43
  docling/datamodel/layout_model_specs.py,sha256=GSkJ-Z_0PVgwWGi7C7TsxbzRjlrWS9ZrHJjHumv-Z5U,2339
44
44
  docling/datamodel/pipeline_options.py,sha256=28opZ3woXA8IKaG2-BHM-lmmi-gyuScCMHGxhlxGOsk,11290
45
45
  docling/datamodel/pipeline_options_asr_model.py,sha256=7X068xl-qpbyPxC7-TwX7Q6tLyZXGT5h1osZ_xLNLM0,1454
46
- docling/datamodel/pipeline_options_vlm_model.py,sha256=AcqqThSW74hwQ6x7pazzm57LnJiUqB7gQi5wFayGlbk,2628
46
+ docling/datamodel/pipeline_options_vlm_model.py,sha256=Szdq5_MhqQ8xBCvOUkdn_LLV29ZMQJcF4xnItYlkmXQ,3090
47
47
  docling/datamodel/settings.py,sha256=c0MTw6pO5be_BKxHKYl4SaBJAw_qL-aapxp-g5HHj1A,2084
48
- docling/datamodel/vlm_model_specs.py,sha256=UMXiTzWCXcx2BtF5slYfWhjRXAx0s1oiAvE-vCzrATo,9686
48
+ docling/datamodel/vlm_model_specs.py,sha256=9TTmihDEFcI-TY1jJ2GTnTcrGa3bLg0e6anN4gPtFgU,10035
49
49
  docling/models/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
50
- docling/models/api_vlm_model.py,sha256=-zisU32pgDRbychyG6-neB0qweNbPaYnLXwiGT7SEdI,2859
51
- docling/models/base_model.py,sha256=beMGyrpl-yYX3YnLzQkLfxMLxwmDWnbcFhkjbUlWJSU,7146
50
+ docling/models/api_vlm_model.py,sha256=iNQ9LiT031Mch-LHn8O2CskVXYkr4weEetZPxynU_9U,4236
51
+ docling/models/base_model.py,sha256=LSaJWkSaDyLBVB4Fv9fkw6kmJ67QnG0t32iGn_u2WjE,7256
52
52
  docling/models/base_ocr_model.py,sha256=kT8TylASOpPlY60rIG6VL6_eLVsfg5KvEVnZHzDWtR0,8193
53
53
  docling/models/code_formula_model.py,sha256=XRugm4EwifLRc-TrAk-glKlktJP-nAPneKh2EOovkJU,11308
54
54
  docling/models/document_picture_classifier.py,sha256=9JvoWeH5uQBC7levjM8zptk7UT-b8EQnD-2EnxTjTT4,6202
@@ -61,7 +61,7 @@ docling/models/picture_description_api_model.py,sha256=o3EkV5aHW_6WzE_fdj_VRnNCr
61
61
  docling/models/picture_description_base_model.py,sha256=kLthLhdlgwhootQ4_xhhcAk6A-vso5-qcsFJ3TcYfO0,2991
62
62
  docling/models/picture_description_vlm_model.py,sha256=Uja_BQSk7F-U1J2hm4yeLguirUzKYv1K8zRyw1IYomY,4150
63
63
  docling/models/rapid_ocr_model.py,sha256=anUVUwaj9Wubgu4FnHdYMuOVkQP_hJiLY1qRToelBoc,7700
64
- docling/models/readingorder_model.py,sha256=bZoXHaSwUsa8niSmJrbCuy784ixCeBXT-RQBUfgHJ4A,14925
64
+ docling/models/readingorder_model.py,sha256=_usJdpM4GMWeGGneEwLLxa9grIGQb0XnNMugV72jGbY,14911
65
65
  docling/models/table_structure_model.py,sha256=7g_mFf1YzfF8PXQfefNu6XYZu7TzJAn86zKb6IEUdCg,12518
66
66
  docling/models/tesseract_ocr_cli_model.py,sha256=I3Gn28Y-LD8OfvyCElN9fLiNgpo2sT0uMkVt258253s,12881
67
67
  docling/models/tesseract_ocr_model.py,sha256=GdI5Cjfi87qcehVbM3wdKRvKkl_F9A4bwTUbjXZCJYA,10745
@@ -72,12 +72,13 @@ docling/models/factories/picture_description_factory.py,sha256=Ru3-TnVVEKf5O07C_
72
72
  docling/models/plugins/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
73
73
  docling/models/plugins/defaults.py,sha256=OAHWW2tCcUXSyDMFxV_lXVRjSBJ1n6z-Eb3R8cDucU4,886
74
74
  docling/models/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
75
- docling/models/utils/hf_model_download.py,sha256=scBEfsM4yl7xPzqe7UtPvDh9RfQZQnuOhqQKilYBHls,984
75
+ docling/models/utils/generation_utils.py,sha256=0ZfMBMbolHAWjdbMza8FbD4_jQ4VY6ReUa4gqVLwMoU,5365
76
+ docling/models/utils/hf_model_download.py,sha256=VlKna9tLIVOGQkIRQBXfDimPIIyeRV7cFCbuOVmFQiU,1092
76
77
  docling/models/vlm_models_inline/__init__.py,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
77
- docling/models/vlm_models_inline/hf_transformers_model.py,sha256=wcBsUGHuPNoZn9pfQh4cyyZi1bFGCoF30T_MFVGDyyM,12128
78
- docling/models/vlm_models_inline/mlx_model.py,sha256=VP05v97mqzmaG4o9bOpJcxIlEqvNzAapJ15Zz3E3ACI,10169
78
+ docling/models/vlm_models_inline/hf_transformers_model.py,sha256=Vr6ZIKMVBGQrb0tXl0dVuqYMorDPEnNdF1axAkUpF1Q,14785
79
+ docling/models/vlm_models_inline/mlx_model.py,sha256=ae7hDMgBsMLkqulmbKDamGSSrLJcroYsP1HApJ90IZM,13471
79
80
  docling/models/vlm_models_inline/nuextract_transformers_model.py,sha256=jLNtlkMDheUyWot7Oqq-GHQIYzJ0fZrbReq5xCnYb9E,10506
80
- docling/models/vlm_models_inline/vllm_model.py,sha256=_EnK1nfpAPJky7aRlyp8SUIghiZOQO8AkDN_hHqXLZg,8615
81
+ docling/models/vlm_models_inline/vllm_model.py,sha256=vXClayYxPGX1jzQ1Rvf3vvwtW9khgApGvcRz4Qbyu7I,10293
81
82
  docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
82
83
  docling/pipeline/asr_pipeline.py,sha256=S55VHLoX3Mgauen1YP-PSUlI0LA1bgTgTkU-eC4U-dg,8481
83
84
  docling/pipeline/base_extraction_pipeline.py,sha256=GYrEz83IXv-tdIHjtNWxMBNczFwL8SZyf9vnPJ3STaI,2627
@@ -89,7 +90,7 @@ docling/pipeline/threaded_standard_pdf_pipeline.py,sha256=i67G5AOW7PIFCe5JS2sdBm
89
90
  docling/pipeline/vlm_pipeline.py,sha256=oMcdgzym_UQbVN3bajux_hENY40XGOnb6NU6Kwje2Os,15376
90
91
  docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
91
92
  docling/utils/accelerator_utils.py,sha256=DSajLxVx1JEVT0zt5de26llciLNlVfIDfSa2zYCFJzQ,2909
92
- docling/utils/api_image_request.py,sha256=_CgdzmPqdsyXmyYUFGLZcXcoH586qC6A1p5vsNbj1Q0,1416
93
+ docling/utils/api_image_request.py,sha256=kQDmTvQT6M2IgXnGYeoNflI6sLUG6WTCcEft94CRwWg,5379
93
94
  docling/utils/export.py,sha256=VwVUnYDk3mhGmISDbVm306fwpGNnoojouStBD4UajXI,4673
94
95
  docling/utils/glm_utils.py,sha256=TKOWQqWAHsX_w4fvoAA7_2xCi_urhnp1DsmjY8_sk5w,12274
95
96
  docling/utils/layout_postprocessor.py,sha256=sE9UR3Nv4iOk26uoIsN3bFioE7ScfAjj0orDBDneLXg,25166
@@ -100,9 +101,9 @@ docling/utils/orientation.py,sha256=jTyLxyT31FlOodZoBMlADHNQK2lAWKYVs5z7pXd_6Cg,
100
101
  docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
101
102
  docling/utils/utils.py,sha256=kJtIYuzXeOyJHYlxmLAo7dGM5rEsDa1i84qEsUj1nio,1908
102
103
  docling/utils/visualization.py,sha256=tY2ylE2aiQKkmzlSLnFW-HTfFyqUUMguW18ldd1PLfo,2868
103
- docling-2.54.0.dist-info/licenses/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
104
- docling-2.54.0.dist-info/METADATA,sha256=_GsdUYyPCv8XKeLeSO9Y0euAH8Eanr5i_y5kLvDEb1g,11252
105
- docling-2.54.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
106
- docling-2.54.0.dist-info/entry_points.txt,sha256=hzVlbeE0aMSTQ9S0-NTYN0Hmgsn6qL_EA2qX4UbkAuY,149
107
- docling-2.54.0.dist-info/top_level.txt,sha256=vkIywP-USjFyYo1AIRQbWQQaL3xB5jf8vkCYdTIfNic,8
108
- docling-2.54.0.dist-info/RECORD,,
104
+ docling-2.55.0.dist-info/licenses/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
105
+ docling-2.55.0.dist-info/METADATA,sha256=e1RK_bATZ2Q_Ie9kC6uHFCj99D7pkW678jxk_l0CHxk,11252
106
+ docling-2.55.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
107
+ docling-2.55.0.dist-info/entry_points.txt,sha256=hzVlbeE0aMSTQ9S0-NTYN0Hmgsn6qL_EA2qX4UbkAuY,149
108
+ docling-2.55.0.dist-info/top_level.txt,sha256=vkIywP-USjFyYo1AIRQbWQQaL3xB5jf8vkCYdTIfNic,8
109
+ docling-2.55.0.dist-info/RECORD,,