docling 2.32.0__py3-none-any.whl → 2.34.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,71 @@
1
+ from typing import Tuple
2
+
3
+ from docling_core.types.doc import BoundingBox, CoordOrigin
4
+ from docling_core.types.doc.page import BoundingRectangle
5
+
6
+ CLIPPED_ORIENTATIONS = [0, 90, 180, 270]
7
+
8
+
9
+ def rotate_bounding_box(
10
+ bbox: BoundingBox, angle: int, im_size: Tuple[int, int]
11
+ ) -> BoundingRectangle:
12
+ # The box is left top width height in TOPLEFT coordinates
13
+ # Bounding rectangle start with r_0 at the bottom left whatever the
14
+ # coordinate system. Then other corners are found rotating counterclockwise
15
+ bbox = bbox.to_top_left_origin(im_size[1])
16
+ left, top, width, height = bbox.l, bbox.t, bbox.width, bbox.height
17
+ im_h, im_w = im_size
18
+ angle = angle % 360
19
+ if angle == 0:
20
+ r_x0 = left
21
+ r_y0 = top + height
22
+ r_x1 = r_x0 + width
23
+ r_y1 = r_y0
24
+ r_x2 = r_x0 + width
25
+ r_y2 = r_y0 - height
26
+ r_x3 = r_x0
27
+ r_y3 = r_y0 - height
28
+ elif angle == 90:
29
+ r_x0 = im_w - (top + height)
30
+ r_y0 = left
31
+ r_x1 = r_x0
32
+ r_y1 = r_y0 + width
33
+ r_x2 = r_x0 + height
34
+ r_y2 = r_y0 + width
35
+ r_x3 = r_x0
36
+ r_y3 = r_y0 + width
37
+ elif angle == 180:
38
+ r_x0 = im_h - left
39
+ r_y0 = im_w - (top + height)
40
+ r_x1 = r_x0 - width
41
+ r_y1 = r_y0
42
+ r_x2 = r_x0 - width
43
+ r_y2 = r_y0 + height
44
+ r_x3 = r_x0
45
+ r_y3 = r_y0 + height
46
+ elif angle == 270:
47
+ r_x0 = top + height
48
+ r_y0 = im_h - left
49
+ r_x1 = r_x0
50
+ r_y1 = r_y0 - width
51
+ r_x2 = r_x0 - height
52
+ r_y2 = r_y0 - width
53
+ r_x3 = r_x0 - height
54
+ r_y3 = r_y0
55
+ else:
56
+ msg = (
57
+ f"invalid orientation {angle}, expected values in:"
58
+ f" {sorted(CLIPPED_ORIENTATIONS)}"
59
+ )
60
+ raise ValueError(msg)
61
+ return BoundingRectangle(
62
+ r_x0=r_x0,
63
+ r_y0=r_y0,
64
+ r_x1=r_x1,
65
+ r_y1=r_y1,
66
+ r_x2=r_x2,
67
+ r_y2=r_y2,
68
+ r_x3=r_x3,
69
+ r_y3=r_y3,
70
+ coord_origin=CoordOrigin.TOPLEFT,
71
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling
3
- Version: 2.32.0
3
+ Version: 2.34.0
4
4
  Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
5
5
  Home-page: https://github.com/docling-project/docling
6
6
  License: MIT
@@ -29,7 +29,7 @@ Requires-Dist: accelerate (>=1.2.1,<2.0.0) ; (sys_platform != "darwin" or platfo
29
29
  Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
30
30
  Requires-Dist: certifi (>=2024.7.4)
31
31
  Requires-Dist: click (<8.2.0)
32
- Requires-Dist: docling-core[chunking] (>=2.26.0,<3.0.0)
32
+ Requires-Dist: docling-core[chunking] (>=2.29.0,<3.0.0)
33
33
  Requires-Dist: docling-ibm-models (>=3.4.0,<4.0.0)
34
34
  Requires-Dist: docling-parse (>=4.0.0,<5.0.0)
35
35
  Requires-Dist: easyocr (>=1.7,<2.0)
@@ -3,9 +3,9 @@ docling/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
3
  docling/backend/abstract_backend.py,sha256=1lNxzwDTn303aXduPDVmTyXn-5ZIoWMLYqNxANGWmQQ,1658
4
4
  docling/backend/asciidoc_backend.py,sha256=W-4MRcID6AU9Ax23q8FwDwGG-OOCrBoqcNf2Ch_WPUc,14041
5
5
  docling/backend/csv_backend.py,sha256=2g9famYG2W-ID9jEdZPxc6O8QGv1vWQfjN8pL-QMBE0,4536
6
- docling/backend/docling_parse_backend.py,sha256=V_CsUdN5RkGQBBq7A_ReAiUW4CQVh0-1Ur157Ozurdg,8017
7
- docling/backend/docling_parse_v2_backend.py,sha256=6fokgqb1hMbZua33gL46EFamrwPTC7ms6ZuEHw-Dv28,9395
8
- docling/backend/docling_parse_v4_backend.py,sha256=-WJZs0IsdN6blhkvTS1eh_qhujYLyJ3XcOMqS6AaXxg,6282
6
+ docling/backend/docling_parse_backend.py,sha256=bVSPmmiVXdCVfe-eLtDhbPQKBjkFR8rZJoRxdWIMdYU,7998
7
+ docling/backend/docling_parse_v2_backend.py,sha256=R4YPCEs72GYg-Xc9VfizPv8QjtGmKOsQzVPNAU2RIK0,9376
8
+ docling/backend/docling_parse_v4_backend.py,sha256=aWh-fd-lnuRGVGC_DG17QUptIsArv5V1gJo8QFbB5Ys,6263
9
9
  docling/backend/docx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  docling/backend/docx/latex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
11
  docling/backend/docx/latex/latex_dict.py,sha256=tFJp4ScT_AkY2ON7nLEa560p601Jq2glcZvMKxxjn7w,6593
@@ -16,9 +16,9 @@ docling/backend/json/docling_json_backend.py,sha256=LlFMVoZrrCfVwbDuRbNN4Xg96Luj
16
16
  docling/backend/md_backend.py,sha256=JkY1qTvQFXjKSZGfD-83d-fZelorUG_l6mpJdYGqvX8,17210
17
17
  docling/backend/msexcel_backend.py,sha256=3j0WQfqDpgPXdPMCguefdv7arcNVDedPD6gl54cmLn8,18110
18
18
  docling/backend/mspowerpoint_backend.py,sha256=RwqfvvzrtM56L9uf7PR9lvlHJ-LyYGpkS1iVxkTl72Q,17203
19
- docling/backend/msword_backend.py,sha256=lVVMNwt0WIl4RD5wAf8pc8bJsb60x1BA8hTTkVmEVa8,32477
19
+ docling/backend/msword_backend.py,sha256=iB2yRg8hXtET2-Wjkv5pq0p9Y1SGQYIVCcWtOtXUILU,44621
20
20
  docling/backend/pdf_backend.py,sha256=KE9TMuFO5WX-o5A_DAd4tEaLi4HMZ4XjKdpllItVkWM,2238
21
- docling/backend/pypdfium2_backend.py,sha256=pX8f0WbUb0KTDTKyQuLzP_lgHHubyGXWD33vmpefPy8,10805
21
+ docling/backend/pypdfium2_backend.py,sha256=fUGRBupwTYftEgdIDWKphA2zdfb-SrUoUGENK6j-q-0,11002
22
22
  docling/backend/xml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
23
  docling/backend/xml/jats_backend.py,sha256=ghGi9bHjx3BvaOtmzLw86-wZy4UxpQPOPQL4e73-BI8,24927
24
24
  docling/backend/xml/uspto_backend.py,sha256=nyAMr5ht7dclxkVDwsKNeiOhLQrUtRLS8JdscB2AVJg,70924
@@ -28,8 +28,8 @@ docling/cli/main.py,sha256=D7WEY4x6pQCVFRy3peK9KUDOb0Y5IVc-vTDqPnHPK00,26138
28
28
  docling/cli/models.py,sha256=9yLGp6QRJGpR86U3SjmWAXDt3MvBaJLLY4xDVdsu3O8,4160
29
29
  docling/cli/tools.py,sha256=QhtRxQG0TVrfsMqdv5i7J0_qQy1ZZyWYnHPwJl7b5oY,322
30
30
  docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
- docling/datamodel/base_models.py,sha256=3BmGoV2HLXOWFuRHFAa42YnWceh-JEpcLXzfFz9AD9Y,7943
32
- docling/datamodel/document.py,sha256=_0Z4zUgCB5677ZW8Y7C1fv75enLZJOJUjcUkGTSiTBA,15553
31
+ docling/datamodel/base_models.py,sha256=QJlzGJKUAO0kqM6DO2RZKlFi-lL2MpY8qt3Wdm02Slw,10460
32
+ docling/datamodel/document.py,sha256=lvdCw36iykfSHqapvwRVD2pdnR9vmnYRfrGFNJuwbug,16011
33
33
  docling/datamodel/pipeline_options.py,sha256=uwjBvK4egrgcF1_w4B5EDxpGnl4IgBzmxP7dJ7zm394,13400
34
34
  docling/datamodel/settings.py,sha256=ajMz7Ao2m0ZGYkfArqTDDbiF89O408mtgeh06PUi0MA,1900
35
35
  docling/document_converter.py,sha256=PRRr65nigQ3LZDl4G2fBMkOtJyswT7xyGt7fpUeDO3w,13849
@@ -47,10 +47,10 @@ docling/models/factories/ocr_factory.py,sha256=G5RkmkKvkl-ihpo6qSj8WC77VdlVSQ1s0
47
47
  docling/models/factories/picture_description_factory.py,sha256=Ru3-TnVVEKf5O07C_UpGf2HCOHc7j20AJzfficw3agM,385
48
48
  docling/models/hf_mlx_model.py,sha256=B_B4hFU-jU0g_DQtQD8w4Ejorn10mkDuFI93wR_WhGk,4897
49
49
  docling/models/hf_vlm_model.py,sha256=SiPMTLghMUjJ66dA2yN4UujpLO6PiOhLEPInWtXV_5s,6912
50
- docling/models/layout_model.py,sha256=0fiJXJ4aPmcMsYY7rbN9LJ2mZ0_8G0ODY9kyNTAN3Ws,7823
50
+ docling/models/layout_model.py,sha256=0Ro7IStAF8ACZLuKu7Gi9Cu96_TvGdxoHSYpz05nHVo,8212
51
51
  docling/models/ocr_mac_model.py,sha256=A3TlEbvvwhkWiq9YARos3Y9yNcpPYQ7JGc_4hFtAK-8,5370
52
- docling/models/page_assemble_model.py,sha256=GO7JI1D6T6EkSW94cLQobPGNQUahkxQqTPRwj5CnmFE,6304
53
- docling/models/page_preprocessing_model.py,sha256=6pOGXiFQ-oz06UmJdcaYMdVyfZ0YVLWS6efGcx7Mxws,3105
52
+ docling/models/page_assemble_model.py,sha256=TvN1naez7dUodLxpUUBzpuMCpqZBTf6YSpewxgjzmrg,6323
53
+ docling/models/page_preprocessing_model.py,sha256=FiPDMmkC1EWTxDjTGbJZH0ZMyXxIOCZDN4qHfoOEfuw,4998
54
54
  docling/models/picture_description_api_model.py,sha256=kCuAFOGEuI5QsRul7Pc1LccxWN7WIvIUhXEmSICYegw,2332
55
55
  docling/models/picture_description_base_model.py,sha256=FbBVXzAOB87xpJN28tuGCxoAdcf6mZNUOqJR7ljUg5g,2946
56
56
  docling/models/picture_description_vlm_model.py,sha256=DiTjnehVy1n0N04xPUvZl8rx4TiNHzHn9Cnzy_ePGts,4177
@@ -59,28 +59,29 @@ docling/models/plugins/defaults.py,sha256=qslXGnRX07Z3GGttNriqaox0v0vXp4zs4KLurH
59
59
  docling/models/rapid_ocr_model.py,sha256=Tq_1Egu5Hjx7Y69Vox17QTtRXztSyflB1fhN08CWQwY,5894
60
60
  docling/models/readingorder_model.py,sha256=S9ru2ApY9sE-Uue3hptWHmbmElwo36bUbAikxCFpHYs,14574
61
61
  docling/models/table_structure_model.py,sha256=1gxLaooK0IKMrnmS8nT1BItKqt1GAKghfpmLKb3i53g,12566
62
- docling/models/tesseract_ocr_cli_model.py,sha256=LXYUCMQAPxQA2pY3zs9wcPSrAHHorTffSmIIWgltoaw,10234
63
- docling/models/tesseract_ocr_model.py,sha256=72009TJL_7tXTEnhlsGRiw_KibrQ0LjZlCBtW8NtwUc,9339
62
+ docling/models/tesseract_ocr_cli_model.py,sha256=e55MkaDdsseKcfX5lxIt0iv5jR6pDFBzWBZHTvl2Jws,12653
63
+ docling/models/tesseract_ocr_model.py,sha256=vS4And5NHe_uLNb6ZBi2CQzWUITBdc1E1zlsojrSZpM,10561
64
64
  docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
65
65
  docling/pipeline/base_pipeline.py,sha256=DnuxAf7EQusdSRae0QUVth-0f2mSff8JZjX-2vazk00,8751
66
66
  docling/pipeline/simple_pipeline.py,sha256=TXZOwR7hZRji462ZTIpte0VJjzbxvNVE8dbLFANDhSU,2253
67
- docling/pipeline/standard_pdf_pipeline.py,sha256=iNZMMGiHTwV6I4u_jjqXhVJ_DiPn_O9qnnee3PQxidc,10773
68
- docling/pipeline/vlm_pipeline.py,sha256=g3bxPEqxK8x-B5S6pOpNNo5GxCMCRDZgPJUFqsBA1eg,9720
67
+ docling/pipeline/standard_pdf_pipeline.py,sha256=wCq0zq8xkiOdNnAEkSuJeELnBjOkLBASD9iQ5mVsSfc,11869
68
+ docling/pipeline/vlm_pipeline.py,sha256=ZW1WGd6jeLqTCWR0S0cj6H_qVMUXELaFCrJVpvZp6Co,9684
69
69
  docling/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
70
70
  docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
71
71
  docling/utils/accelerator_utils.py,sha256=ONNRrC8fH-8E93WUCNhfOq1t7WrQ1T7-YsmExTOY5f0,2292
72
72
  docling/utils/api_image_request.py,sha256=_CgdzmPqdsyXmyYUFGLZcXcoH586qC6A1p5vsNbj1Q0,1416
73
73
  docling/utils/export.py,sha256=VwVUnYDk3mhGmISDbVm306fwpGNnoojouStBD4UajXI,4673
74
74
  docling/utils/glm_utils.py,sha256=TKOWQqWAHsX_w4fvoAA7_2xCi_urhnp1DsmjY8_sk5w,12274
75
- docling/utils/layout_postprocessor.py,sha256=x7exVG3HYzV9M_O78FfyoG43Y2L7PPMMydvSNwjqh8s,24528
75
+ docling/utils/layout_postprocessor.py,sha256=3WCmkPsPJ80xfWzAUeWb5L9BmuwJ79ztctvbbUs8AfI,24068
76
76
  docling/utils/locks.py,sha256=RzqQtD5UispgV71pGN_nU6GYfeN11BN0Sh_Dq9ycqGo,52
77
77
  docling/utils/model_downloader.py,sha256=ocvud3G3qlBQhzMo69Q3RJMnvq5HPZ2DwNbMuEp8RCs,4142
78
- docling/utils/ocr_utils.py,sha256=F7iOOjqolUcImUzir4qjDQd4QWSO3s6JC4WRn3U7uY4,263
78
+ docling/utils/ocr_utils.py,sha256=AOaDAHr5S74d-IRVR_LKhKynUTIurAwLJ3wNeY58gPA,2326
79
+ docling/utils/orientation.py,sha256=xXlOfowL54FKwjsTFrM7y3ogk1wChLNn_-u74tYIf1s,2011
79
80
  docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
80
81
  docling/utils/utils.py,sha256=kJtIYuzXeOyJHYlxmLAo7dGM5rEsDa1i84qEsUj1nio,1908
81
82
  docling/utils/visualization.py,sha256=tY2ylE2aiQKkmzlSLnFW-HTfFyqUUMguW18ldd1PLfo,2868
82
- docling-2.32.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
83
- docling-2.32.0.dist-info/METADATA,sha256=Nd6x_-yL4ghk90c6Z--1nrkq6_8TpojB6K9iyDYQ2KM,10138
84
- docling-2.32.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
85
- docling-2.32.0.dist-info/entry_points.txt,sha256=pIxel-UeVo1S7FhoNG5xgEfPjLZfBLi_N9TsGPtJSLo,144
86
- docling-2.32.0.dist-info/RECORD,,
83
+ docling-2.34.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
84
+ docling-2.34.0.dist-info/METADATA,sha256=s1PANBKtKOaJPgUhrSpeiN0z-8Jx5VvplXLo-7z0sfs,10138
85
+ docling-2.34.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
86
+ docling-2.34.0.dist-info/entry_points.txt,sha256=pIxel-UeVo1S7FhoNG5xgEfPjLZfBLi_N9TsGPtJSLo,144
87
+ docling-2.34.0.dist-info/RECORD,,