docling 2.32.0__py3-none-any.whl → 2.34.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docling/backend/docling_parse_backend.py +1 -1
- docling/backend/docling_parse_v2_backend.py +1 -1
- docling/backend/docling_parse_v4_backend.py +1 -1
- docling/backend/msword_backend.py +269 -12
- docling/backend/pypdfium2_backend.py +6 -1
- docling/datamodel/base_models.py +99 -2
- docling/datamodel/document.py +11 -2
- docling/models/layout_model.py +9 -0
- docling/models/page_assemble_model.py +1 -0
- docling/models/page_preprocessing_model.py +50 -1
- docling/models/tesseract_ocr_cli_model.py +85 -41
- docling/models/tesseract_ocr_model.py +52 -30
- docling/pipeline/standard_pdf_pipeline.py +28 -3
- docling/pipeline/vlm_pipeline.py +19 -21
- docling/utils/layout_postprocessor.py +10 -22
- docling/utils/ocr_utils.py +60 -0
- docling/utils/orientation.py +71 -0
- {docling-2.32.0.dist-info → docling-2.34.0.dist-info}/METADATA +2 -2
- {docling-2.32.0.dist-info → docling-2.34.0.dist-info}/RECORD +22 -21
- {docling-2.32.0.dist-info → docling-2.34.0.dist-info}/LICENSE +0 -0
- {docling-2.32.0.dist-info → docling-2.34.0.dist-info}/WHEEL +0 -0
- {docling-2.32.0.dist-info → docling-2.34.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,71 @@
|
|
1
|
+
from typing import Tuple
|
2
|
+
|
3
|
+
from docling_core.types.doc import BoundingBox, CoordOrigin
|
4
|
+
from docling_core.types.doc.page import BoundingRectangle
|
5
|
+
|
6
|
+
CLIPPED_ORIENTATIONS = [0, 90, 180, 270]
|
7
|
+
|
8
|
+
|
9
|
+
def rotate_bounding_box(
|
10
|
+
bbox: BoundingBox, angle: int, im_size: Tuple[int, int]
|
11
|
+
) -> BoundingRectangle:
|
12
|
+
# The box is left top width height in TOPLEFT coordinates
|
13
|
+
# Bounding rectangle start with r_0 at the bottom left whatever the
|
14
|
+
# coordinate system. Then other corners are found rotating counterclockwise
|
15
|
+
bbox = bbox.to_top_left_origin(im_size[1])
|
16
|
+
left, top, width, height = bbox.l, bbox.t, bbox.width, bbox.height
|
17
|
+
im_h, im_w = im_size
|
18
|
+
angle = angle % 360
|
19
|
+
if angle == 0:
|
20
|
+
r_x0 = left
|
21
|
+
r_y0 = top + height
|
22
|
+
r_x1 = r_x0 + width
|
23
|
+
r_y1 = r_y0
|
24
|
+
r_x2 = r_x0 + width
|
25
|
+
r_y2 = r_y0 - height
|
26
|
+
r_x3 = r_x0
|
27
|
+
r_y3 = r_y0 - height
|
28
|
+
elif angle == 90:
|
29
|
+
r_x0 = im_w - (top + height)
|
30
|
+
r_y0 = left
|
31
|
+
r_x1 = r_x0
|
32
|
+
r_y1 = r_y0 + width
|
33
|
+
r_x2 = r_x0 + height
|
34
|
+
r_y2 = r_y0 + width
|
35
|
+
r_x3 = r_x0
|
36
|
+
r_y3 = r_y0 + width
|
37
|
+
elif angle == 180:
|
38
|
+
r_x0 = im_h - left
|
39
|
+
r_y0 = im_w - (top + height)
|
40
|
+
r_x1 = r_x0 - width
|
41
|
+
r_y1 = r_y0
|
42
|
+
r_x2 = r_x0 - width
|
43
|
+
r_y2 = r_y0 + height
|
44
|
+
r_x3 = r_x0
|
45
|
+
r_y3 = r_y0 + height
|
46
|
+
elif angle == 270:
|
47
|
+
r_x0 = top + height
|
48
|
+
r_y0 = im_h - left
|
49
|
+
r_x1 = r_x0
|
50
|
+
r_y1 = r_y0 - width
|
51
|
+
r_x2 = r_x0 - height
|
52
|
+
r_y2 = r_y0 - width
|
53
|
+
r_x3 = r_x0 - height
|
54
|
+
r_y3 = r_y0
|
55
|
+
else:
|
56
|
+
msg = (
|
57
|
+
f"invalid orientation {angle}, expected values in:"
|
58
|
+
f" {sorted(CLIPPED_ORIENTATIONS)}"
|
59
|
+
)
|
60
|
+
raise ValueError(msg)
|
61
|
+
return BoundingRectangle(
|
62
|
+
r_x0=r_x0,
|
63
|
+
r_y0=r_y0,
|
64
|
+
r_x1=r_x1,
|
65
|
+
r_y1=r_y1,
|
66
|
+
r_x2=r_x2,
|
67
|
+
r_y2=r_y2,
|
68
|
+
r_x3=r_x3,
|
69
|
+
r_y3=r_y3,
|
70
|
+
coord_origin=CoordOrigin.TOPLEFT,
|
71
|
+
)
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: docling
|
3
|
-
Version: 2.
|
3
|
+
Version: 2.34.0
|
4
4
|
Summary: SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications.
|
5
5
|
Home-page: https://github.com/docling-project/docling
|
6
6
|
License: MIT
|
@@ -29,7 +29,7 @@ Requires-Dist: accelerate (>=1.2.1,<2.0.0) ; (sys_platform != "darwin" or platfo
|
|
29
29
|
Requires-Dist: beautifulsoup4 (>=4.12.3,<5.0.0)
|
30
30
|
Requires-Dist: certifi (>=2024.7.4)
|
31
31
|
Requires-Dist: click (<8.2.0)
|
32
|
-
Requires-Dist: docling-core[chunking] (>=2.
|
32
|
+
Requires-Dist: docling-core[chunking] (>=2.29.0,<3.0.0)
|
33
33
|
Requires-Dist: docling-ibm-models (>=3.4.0,<4.0.0)
|
34
34
|
Requires-Dist: docling-parse (>=4.0.0,<5.0.0)
|
35
35
|
Requires-Dist: easyocr (>=1.7,<2.0)
|
@@ -3,9 +3,9 @@ docling/backend/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
3
3
|
docling/backend/abstract_backend.py,sha256=1lNxzwDTn303aXduPDVmTyXn-5ZIoWMLYqNxANGWmQQ,1658
|
4
4
|
docling/backend/asciidoc_backend.py,sha256=W-4MRcID6AU9Ax23q8FwDwGG-OOCrBoqcNf2Ch_WPUc,14041
|
5
5
|
docling/backend/csv_backend.py,sha256=2g9famYG2W-ID9jEdZPxc6O8QGv1vWQfjN8pL-QMBE0,4536
|
6
|
-
docling/backend/docling_parse_backend.py,sha256=
|
7
|
-
docling/backend/docling_parse_v2_backend.py,sha256=
|
8
|
-
docling/backend/docling_parse_v4_backend.py,sha256
|
6
|
+
docling/backend/docling_parse_backend.py,sha256=bVSPmmiVXdCVfe-eLtDhbPQKBjkFR8rZJoRxdWIMdYU,7998
|
7
|
+
docling/backend/docling_parse_v2_backend.py,sha256=R4YPCEs72GYg-Xc9VfizPv8QjtGmKOsQzVPNAU2RIK0,9376
|
8
|
+
docling/backend/docling_parse_v4_backend.py,sha256=aWh-fd-lnuRGVGC_DG17QUptIsArv5V1gJo8QFbB5Ys,6263
|
9
9
|
docling/backend/docx/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
10
10
|
docling/backend/docx/latex/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
11
11
|
docling/backend/docx/latex/latex_dict.py,sha256=tFJp4ScT_AkY2ON7nLEa560p601Jq2glcZvMKxxjn7w,6593
|
@@ -16,9 +16,9 @@ docling/backend/json/docling_json_backend.py,sha256=LlFMVoZrrCfVwbDuRbNN4Xg96Luj
|
|
16
16
|
docling/backend/md_backend.py,sha256=JkY1qTvQFXjKSZGfD-83d-fZelorUG_l6mpJdYGqvX8,17210
|
17
17
|
docling/backend/msexcel_backend.py,sha256=3j0WQfqDpgPXdPMCguefdv7arcNVDedPD6gl54cmLn8,18110
|
18
18
|
docling/backend/mspowerpoint_backend.py,sha256=RwqfvvzrtM56L9uf7PR9lvlHJ-LyYGpkS1iVxkTl72Q,17203
|
19
|
-
docling/backend/msword_backend.py,sha256=
|
19
|
+
docling/backend/msword_backend.py,sha256=iB2yRg8hXtET2-Wjkv5pq0p9Y1SGQYIVCcWtOtXUILU,44621
|
20
20
|
docling/backend/pdf_backend.py,sha256=KE9TMuFO5WX-o5A_DAd4tEaLi4HMZ4XjKdpllItVkWM,2238
|
21
|
-
docling/backend/pypdfium2_backend.py,sha256=
|
21
|
+
docling/backend/pypdfium2_backend.py,sha256=fUGRBupwTYftEgdIDWKphA2zdfb-SrUoUGENK6j-q-0,11002
|
22
22
|
docling/backend/xml/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
23
23
|
docling/backend/xml/jats_backend.py,sha256=ghGi9bHjx3BvaOtmzLw86-wZy4UxpQPOPQL4e73-BI8,24927
|
24
24
|
docling/backend/xml/uspto_backend.py,sha256=nyAMr5ht7dclxkVDwsKNeiOhLQrUtRLS8JdscB2AVJg,70924
|
@@ -28,8 +28,8 @@ docling/cli/main.py,sha256=D7WEY4x6pQCVFRy3peK9KUDOb0Y5IVc-vTDqPnHPK00,26138
|
|
28
28
|
docling/cli/models.py,sha256=9yLGp6QRJGpR86U3SjmWAXDt3MvBaJLLY4xDVdsu3O8,4160
|
29
29
|
docling/cli/tools.py,sha256=QhtRxQG0TVrfsMqdv5i7J0_qQy1ZZyWYnHPwJl7b5oY,322
|
30
30
|
docling/datamodel/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
31
|
-
docling/datamodel/base_models.py,sha256=
|
32
|
-
docling/datamodel/document.py,sha256=
|
31
|
+
docling/datamodel/base_models.py,sha256=QJlzGJKUAO0kqM6DO2RZKlFi-lL2MpY8qt3Wdm02Slw,10460
|
32
|
+
docling/datamodel/document.py,sha256=lvdCw36iykfSHqapvwRVD2pdnR9vmnYRfrGFNJuwbug,16011
|
33
33
|
docling/datamodel/pipeline_options.py,sha256=uwjBvK4egrgcF1_w4B5EDxpGnl4IgBzmxP7dJ7zm394,13400
|
34
34
|
docling/datamodel/settings.py,sha256=ajMz7Ao2m0ZGYkfArqTDDbiF89O408mtgeh06PUi0MA,1900
|
35
35
|
docling/document_converter.py,sha256=PRRr65nigQ3LZDl4G2fBMkOtJyswT7xyGt7fpUeDO3w,13849
|
@@ -47,10 +47,10 @@ docling/models/factories/ocr_factory.py,sha256=G5RkmkKvkl-ihpo6qSj8WC77VdlVSQ1s0
|
|
47
47
|
docling/models/factories/picture_description_factory.py,sha256=Ru3-TnVVEKf5O07C_UpGf2HCOHc7j20AJzfficw3agM,385
|
48
48
|
docling/models/hf_mlx_model.py,sha256=B_B4hFU-jU0g_DQtQD8w4Ejorn10mkDuFI93wR_WhGk,4897
|
49
49
|
docling/models/hf_vlm_model.py,sha256=SiPMTLghMUjJ66dA2yN4UujpLO6PiOhLEPInWtXV_5s,6912
|
50
|
-
docling/models/layout_model.py,sha256=
|
50
|
+
docling/models/layout_model.py,sha256=0Ro7IStAF8ACZLuKu7Gi9Cu96_TvGdxoHSYpz05nHVo,8212
|
51
51
|
docling/models/ocr_mac_model.py,sha256=A3TlEbvvwhkWiq9YARos3Y9yNcpPYQ7JGc_4hFtAK-8,5370
|
52
|
-
docling/models/page_assemble_model.py,sha256=
|
53
|
-
docling/models/page_preprocessing_model.py,sha256=
|
52
|
+
docling/models/page_assemble_model.py,sha256=TvN1naez7dUodLxpUUBzpuMCpqZBTf6YSpewxgjzmrg,6323
|
53
|
+
docling/models/page_preprocessing_model.py,sha256=FiPDMmkC1EWTxDjTGbJZH0ZMyXxIOCZDN4qHfoOEfuw,4998
|
54
54
|
docling/models/picture_description_api_model.py,sha256=kCuAFOGEuI5QsRul7Pc1LccxWN7WIvIUhXEmSICYegw,2332
|
55
55
|
docling/models/picture_description_base_model.py,sha256=FbBVXzAOB87xpJN28tuGCxoAdcf6mZNUOqJR7ljUg5g,2946
|
56
56
|
docling/models/picture_description_vlm_model.py,sha256=DiTjnehVy1n0N04xPUvZl8rx4TiNHzHn9Cnzy_ePGts,4177
|
@@ -59,28 +59,29 @@ docling/models/plugins/defaults.py,sha256=qslXGnRX07Z3GGttNriqaox0v0vXp4zs4KLurH
|
|
59
59
|
docling/models/rapid_ocr_model.py,sha256=Tq_1Egu5Hjx7Y69Vox17QTtRXztSyflB1fhN08CWQwY,5894
|
60
60
|
docling/models/readingorder_model.py,sha256=S9ru2ApY9sE-Uue3hptWHmbmElwo36bUbAikxCFpHYs,14574
|
61
61
|
docling/models/table_structure_model.py,sha256=1gxLaooK0IKMrnmS8nT1BItKqt1GAKghfpmLKb3i53g,12566
|
62
|
-
docling/models/tesseract_ocr_cli_model.py,sha256=
|
63
|
-
docling/models/tesseract_ocr_model.py,sha256=
|
62
|
+
docling/models/tesseract_ocr_cli_model.py,sha256=e55MkaDdsseKcfX5lxIt0iv5jR6pDFBzWBZHTvl2Jws,12653
|
63
|
+
docling/models/tesseract_ocr_model.py,sha256=vS4And5NHe_uLNb6ZBi2CQzWUITBdc1E1zlsojrSZpM,10561
|
64
64
|
docling/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
65
65
|
docling/pipeline/base_pipeline.py,sha256=DnuxAf7EQusdSRae0QUVth-0f2mSff8JZjX-2vazk00,8751
|
66
66
|
docling/pipeline/simple_pipeline.py,sha256=TXZOwR7hZRji462ZTIpte0VJjzbxvNVE8dbLFANDhSU,2253
|
67
|
-
docling/pipeline/standard_pdf_pipeline.py,sha256=
|
68
|
-
docling/pipeline/vlm_pipeline.py,sha256=
|
67
|
+
docling/pipeline/standard_pdf_pipeline.py,sha256=wCq0zq8xkiOdNnAEkSuJeELnBjOkLBASD9iQ5mVsSfc,11869
|
68
|
+
docling/pipeline/vlm_pipeline.py,sha256=ZW1WGd6jeLqTCWR0S0cj6H_qVMUXELaFCrJVpvZp6Co,9684
|
69
69
|
docling/py.typed,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
70
70
|
docling/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
71
71
|
docling/utils/accelerator_utils.py,sha256=ONNRrC8fH-8E93WUCNhfOq1t7WrQ1T7-YsmExTOY5f0,2292
|
72
72
|
docling/utils/api_image_request.py,sha256=_CgdzmPqdsyXmyYUFGLZcXcoH586qC6A1p5vsNbj1Q0,1416
|
73
73
|
docling/utils/export.py,sha256=VwVUnYDk3mhGmISDbVm306fwpGNnoojouStBD4UajXI,4673
|
74
74
|
docling/utils/glm_utils.py,sha256=TKOWQqWAHsX_w4fvoAA7_2xCi_urhnp1DsmjY8_sk5w,12274
|
75
|
-
docling/utils/layout_postprocessor.py,sha256=
|
75
|
+
docling/utils/layout_postprocessor.py,sha256=3WCmkPsPJ80xfWzAUeWb5L9BmuwJ79ztctvbbUs8AfI,24068
|
76
76
|
docling/utils/locks.py,sha256=RzqQtD5UispgV71pGN_nU6GYfeN11BN0Sh_Dq9ycqGo,52
|
77
77
|
docling/utils/model_downloader.py,sha256=ocvud3G3qlBQhzMo69Q3RJMnvq5HPZ2DwNbMuEp8RCs,4142
|
78
|
-
docling/utils/ocr_utils.py,sha256=
|
78
|
+
docling/utils/ocr_utils.py,sha256=AOaDAHr5S74d-IRVR_LKhKynUTIurAwLJ3wNeY58gPA,2326
|
79
|
+
docling/utils/orientation.py,sha256=xXlOfowL54FKwjsTFrM7y3ogk1wChLNn_-u74tYIf1s,2011
|
79
80
|
docling/utils/profiling.py,sha256=YaMGoB9MMZpagF9mb5ndoHj8Lpb9aIdb7El-Pl7IcFs,1753
|
80
81
|
docling/utils/utils.py,sha256=kJtIYuzXeOyJHYlxmLAo7dGM5rEsDa1i84qEsUj1nio,1908
|
81
82
|
docling/utils/visualization.py,sha256=tY2ylE2aiQKkmzlSLnFW-HTfFyqUUMguW18ldd1PLfo,2868
|
82
|
-
docling-2.
|
83
|
-
docling-2.
|
84
|
-
docling-2.
|
85
|
-
docling-2.
|
86
|
-
docling-2.
|
83
|
+
docling-2.34.0.dist-info/LICENSE,sha256=mBb7ErEcM8VS9OhiGHnQ2kk75HwPhr54W1Oiz3965MY,1088
|
84
|
+
docling-2.34.0.dist-info/METADATA,sha256=s1PANBKtKOaJPgUhrSpeiN0z-8Jx5VvplXLo-7z0sfs,10138
|
85
|
+
docling-2.34.0.dist-info/WHEEL,sha256=Nq82e9rUAnEjt98J6MlVmMCZb-t9cYE2Ir1kpBmnWfs,88
|
86
|
+
docling-2.34.0.dist-info/entry_points.txt,sha256=pIxel-UeVo1S7FhoNG5xgEfPjLZfBLi_N9TsGPtJSLo,144
|
87
|
+
docling-2.34.0.dist-info/RECORD,,
|
File without changes
|
File without changes
|
File without changes
|