docling-core 1.6.3__py3-none-any.whl → 1.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of docling-core might be problematic. Click here for more details.
- docling_core/transforms/chunker/base.py +34 -5
- docling_core/transforms/chunker/hierarchical_chunker.py +36 -19
- docling_core/transforms/id_generator/__init__.py +12 -0
- docling_core/transforms/id_generator/base.py +30 -0
- docling_core/transforms/id_generator/doc_hash_id_generator.py +27 -0
- docling_core/transforms/id_generator/uuid_generator.py +34 -0
- docling_core/transforms/metadata_extractor/__init__.py +13 -0
- docling_core/transforms/metadata_extractor/base.py +59 -0
- docling_core/transforms/metadata_extractor/simple_metadata_extractor.py +59 -0
- docling_core/types/base.py +4 -1
- docling_core/types/doc/base.py +0 -1
- docling_core/types/experimental/__init__.py +30 -0
- docling_core/types/experimental/base.py +167 -0
- docling_core/types/experimental/document.py +1192 -0
- docling_core/types/experimental/labels.py +50 -0
- {docling_core-1.6.3.dist-info → docling_core-1.7.1.dist-info}/METADATA +1 -1
- {docling_core-1.6.3.dist-info → docling_core-1.7.1.dist-info}/RECORD +20 -9
- {docling_core-1.6.3.dist-info → docling_core-1.7.1.dist-info}/LICENSE +0 -0
- {docling_core-1.6.3.dist-info → docling_core-1.7.1.dist-info}/WHEEL +0 -0
- {docling_core-1.6.3.dist-info → docling_core-1.7.1.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,50 @@
|
|
|
1
|
+
"""Models for the labels types."""
|
|
2
|
+
|
|
3
|
+
from enum import Enum
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class DocItemLabel(str, Enum):
|
|
7
|
+
"""DocItemLabel."""
|
|
8
|
+
|
|
9
|
+
# DocLayNet v2
|
|
10
|
+
CAPTION = "caption"
|
|
11
|
+
FOOTNOTE = "footnote"
|
|
12
|
+
FORMULA = "formula"
|
|
13
|
+
LIST_ITEM = "list_item"
|
|
14
|
+
PAGE_FOOTER = "page_footer"
|
|
15
|
+
PAGE_HEADER = "page_header"
|
|
16
|
+
PICTURE = "picture"
|
|
17
|
+
SECTION_HEADER = "section_header"
|
|
18
|
+
TABLE = "table"
|
|
19
|
+
TEXT = "text"
|
|
20
|
+
TITLE = "title"
|
|
21
|
+
DOCUMENT_INDEX = "document_index"
|
|
22
|
+
CODE = "code"
|
|
23
|
+
CHECKBOX_SELECTED = "checkbox_selected"
|
|
24
|
+
CHECKBOX_UNSELECTED = "checkbox_unselected"
|
|
25
|
+
FORM = "form"
|
|
26
|
+
KEY_VALUE_REGION = "key_value_region"
|
|
27
|
+
|
|
28
|
+
# Additional labels for markup-based formats (e.g. HTML, Word)
|
|
29
|
+
PARAGRAPH = "paragraph" # explicitly a paragraph and not arbitrary text
|
|
30
|
+
REFERENCE = "reference"
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
class GroupLabel(str, Enum):
|
|
34
|
+
"""GroupLabel."""
|
|
35
|
+
|
|
36
|
+
UNSPECIFIED = "unspecified"
|
|
37
|
+
LIST = "list" # group label for list container (not the list-items)
|
|
38
|
+
CHAPTER = "chapter"
|
|
39
|
+
SECTION = "section"
|
|
40
|
+
SHEET = "sheet"
|
|
41
|
+
SLIDE = "slide"
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
class TableCellLabel(str, Enum):
|
|
45
|
+
"""TableCellLabel."""
|
|
46
|
+
|
|
47
|
+
COLUMN_HEADER = "col_header"
|
|
48
|
+
ROW_HEADER = "row_header"
|
|
49
|
+
ROW_SECTION = "row_section"
|
|
50
|
+
BODY = "body"
|
|
@@ -15,17 +15,28 @@ docling_core/search/meta.py,sha256=wSurrsqdP1N3gQKx027fVdzVmc33a7Y6rPl-FClQvtA,3
|
|
|
15
15
|
docling_core/search/package.py,sha256=Lz2ml2eDy5t0ZimnGTq-DXHAn-f18w0bn4H5xrhs75A,1841
|
|
16
16
|
docling_core/transforms/__init__.py,sha256=P81y_oqkiTN4Ld5crh1gQ6BbHqqR6C6nBt9ACDd57ds,106
|
|
17
17
|
docling_core/transforms/chunker/__init__.py,sha256=xZ5ELOB8tbCoJY1dKUvOrFqxYyoHmmCNUSHxrrRi8a4,317
|
|
18
|
-
docling_core/transforms/chunker/base.py,sha256=
|
|
19
|
-
docling_core/transforms/chunker/hierarchical_chunker.py,sha256=
|
|
18
|
+
docling_core/transforms/chunker/base.py,sha256=5EW89CZf4SMB6Eh4yNjzYoNjn8S7oHH8NEpMck3Lcio,2078
|
|
19
|
+
docling_core/transforms/chunker/hierarchical_chunker.py,sha256=PQ1X-AdgbfitKmk8zUPcNBtwNjLxnjUEH7K3aPGFl-k,12677
|
|
20
|
+
docling_core/transforms/id_generator/__init__.py,sha256=7UoSyAcLsvw-RRrNjYXRVS4rIOUXjwqVpaQA-SSeINU,379
|
|
21
|
+
docling_core/transforms/id_generator/base.py,sha256=SufPsaZUfMpuITq7pMv5YtlLmtGTDgA4LWmjmhQuSM0,704
|
|
22
|
+
docling_core/transforms/id_generator/doc_hash_id_generator.py,sha256=SUw4FBhMZtbWCfc7oMucSwYvJTXqIPMn3yCXPRxtPCI,656
|
|
23
|
+
docling_core/transforms/id_generator/uuid_generator.py,sha256=t8Bky_1JQB9myX-PJGWvW_c4-NvtHPHab6b1NdS-bpU,929
|
|
24
|
+
docling_core/transforms/metadata_extractor/__init__.py,sha256=q_eAUcbaToEuYUPco4uiBO8vgTGSmZUC-r0mS7KbWh8,335
|
|
25
|
+
docling_core/transforms/metadata_extractor/base.py,sha256=7h_S6-buCVtvAvKQKLISjDqFV8D3brewiQ-geqlUriI,1467
|
|
26
|
+
docling_core/transforms/metadata_extractor/simple_metadata_extractor.py,sha256=ZRjDdXgFe8jPBNC_0ruJjQanabpkxceVsCJVVWVWlIg,1629
|
|
20
27
|
docling_core/types/__init__.py,sha256=6mrAEKRW85uHJwNQBufwjPcMWCjm3oocA6MaO4_NLgg,805
|
|
21
|
-
docling_core/types/base.py,sha256=
|
|
28
|
+
docling_core/types/base.py,sha256=PusJskRVL19y-hq0BgXr5e8--QEqSqLnFNJ8UbOqW88,8318
|
|
22
29
|
docling_core/types/doc/__init__.py,sha256=Pzj_8rft6SJTVTCHgXRwHtuZjL6LK_6dcBWjikL9biY,125
|
|
23
|
-
docling_core/types/doc/base.py,sha256=
|
|
30
|
+
docling_core/types/doc/base.py,sha256=ujko-oQKoXw6wjBn0Il2Khu3PyljHqYnUNh3mPDVJF8,14676
|
|
24
31
|
docling_core/types/doc/doc_ann.py,sha256=8pV2efUglw19jxl4_oqB__mSxjWvtGIcllyCdqA-b2s,1196
|
|
25
32
|
docling_core/types/doc/doc_ocr.py,sha256=6PC0C-OczF-MyfgRxEI1xs3PWgNOzi7i2yEQbTqZz0I,1387
|
|
26
33
|
docling_core/types/doc/doc_raw.py,sha256=Y69G6IiauNDaoT-5el4xo1ypWpnBJQ75akGGkCMTZSc,3888
|
|
27
34
|
docling_core/types/doc/document.py,sha256=AKp1kOo0tncf9FX3q7qRWQ2Jz_hZE44smZpyrtsRzY4,24104
|
|
28
35
|
docling_core/types/doc/tokens.py,sha256=uU_MYW_p7ypf7eYICFBvxdnVaPZ7CQnvZmbJ6oPrtEA,6134
|
|
36
|
+
docling_core/types/experimental/__init__.py,sha256=mpqa2soTcHHEKqkcSeYBbAHepg0OgVZNReKvPmGz2r4,587
|
|
37
|
+
docling_core/types/experimental/base.py,sha256=k04zvzNI7qo4HfKxLPCePKxCnerzXd582gvrVjF25SI,4225
|
|
38
|
+
docling_core/types/experimental/document.py,sha256=X3z4sjRmWytRbEmSCnKat4D9sYxSV7Olm1YNmG3c5Kg,37874
|
|
39
|
+
docling_core/types/experimental/labels.py,sha256=tpmvpmJuQyYMLhxAvJSVuFhDRh_zQNiP1WrQmNXKQzo,1224
|
|
29
40
|
docling_core/types/gen/__init__.py,sha256=C6TuCfvpSnSL5XDOFMcYHUY2-i08vvfOGRcdu6Af0pI,124
|
|
30
41
|
docling_core/types/gen/generic.py,sha256=l4CZ4_Lb8ONG36WNJWbKX5hGKvTh_yU-hXp5hsm7uVU,844
|
|
31
42
|
docling_core/types/nlp/__init__.py,sha256=hGcztAeVK7xkRBqRRvc4zbY4PGeJ0r0QrEsetnSx9nI,119
|
|
@@ -45,8 +56,8 @@ docling_core/utils/ds_generate_jsonschema.py,sha256=EhNQutqWJFWuN-yl9UUPFZ7DJTvG
|
|
|
45
56
|
docling_core/utils/file.py,sha256=VQgzjyvmJnAIHB6ex7ikcmbDAR4GA1ALreuO7Ubrp50,1895
|
|
46
57
|
docling_core/utils/validate.py,sha256=3FmnxnKTDZC5J9OGxCL3U3DGRl0t0bBV1NcySXswdas,2031
|
|
47
58
|
docling_core/utils/validators.py,sha256=fBdyWX4PvFh7o_d25ZTs4iwmeo75QTbrxsvXv2kXkTg,2777
|
|
48
|
-
docling_core-1.
|
|
49
|
-
docling_core-1.
|
|
50
|
-
docling_core-1.
|
|
51
|
-
docling_core-1.
|
|
52
|
-
docling_core-1.
|
|
59
|
+
docling_core-1.7.1.dist-info/LICENSE,sha256=2M9-6EoQ1sxFztTOkXGAtwUDJvnWaAHdB9BYWVwGkIw,1087
|
|
60
|
+
docling_core-1.7.1.dist-info/METADATA,sha256=C-LfCUGUnDZ6s3A79IhqbZch1v8zn09YY9M1jE51kao,5383
|
|
61
|
+
docling_core-1.7.1.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
62
|
+
docling_core-1.7.1.dist-info/entry_points.txt,sha256=XHhtJEkdUuLxXSNxLdFIzx_siQ3z2UFQEKp-P8VYAE4,189
|
|
63
|
+
docling_core-1.7.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|