docling-core 2.0.0__tar.gz → 2.0.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

Files changed (56) hide show
  1. {docling_core-2.0.0 → docling_core-2.0.1}/PKG-INFO +12 -12
  2. {docling_core-2.0.0 → docling_core-2.0.1}/README.md +11 -11
  3. docling_core-2.0.1/docling_core/types/__init__.py +10 -0
  4. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/types/doc/document.py +1 -0
  5. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/utils/generate_docs.py +1 -1
  6. {docling_core-2.0.0 → docling_core-2.0.1}/pyproject.toml +1 -1
  7. docling_core-2.0.0/docling_core/types/__init__.py +0 -29
  8. {docling_core-2.0.0 → docling_core-2.0.1}/LICENSE +0 -0
  9. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/__init__.py +0 -0
  10. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/py.typed +0 -0
  11. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/resources/schemas/doc/ANN.json +0 -0
  12. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/resources/schemas/doc/DOC.json +0 -0
  13. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/resources/schemas/doc/OCR-output.json +0 -0
  14. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/resources/schemas/doc/RAW.json +0 -0
  15. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/resources/schemas/generated/ccs_document_schema.json +0 -0
  16. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/resources/schemas/generated/minimal_document_schema_flat.json +0 -0
  17. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/resources/schemas/search/search_doc_mapping.json +0 -0
  18. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/resources/schemas/search/search_doc_mapping_v2.json +0 -0
  19. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/search/__init__.py +0 -0
  20. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/search/json_schema_to_search_mapper.py +0 -0
  21. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/search/mapping.py +0 -0
  22. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/search/meta.py +0 -0
  23. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/search/package.py +0 -0
  24. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/transforms/__init__.py +0 -0
  25. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/transforms/chunker/__init__.py +0 -0
  26. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/transforms/chunker/base.py +0 -0
  27. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/transforms/chunker/hierarchical_chunker.py +0 -0
  28. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/types/base.py +0 -0
  29. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/types/doc/__init__.py +0 -0
  30. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/types/doc/base.py +0 -0
  31. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/types/doc/labels.py +0 -0
  32. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/types/gen/__init__.py +0 -0
  33. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/types/gen/generic.py +0 -0
  34. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/types/legacy_doc/__init__.py +0 -0
  35. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/types/legacy_doc/base.py +0 -0
  36. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/types/legacy_doc/doc_ann.py +0 -0
  37. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/types/legacy_doc/doc_ocr.py +0 -0
  38. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/types/legacy_doc/doc_raw.py +0 -0
  39. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/types/legacy_doc/document.py +0 -0
  40. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/types/legacy_doc/tokens.py +0 -0
  41. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/types/nlp/__init__.py +0 -0
  42. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/types/nlp/qa.py +0 -0
  43. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/types/nlp/qa_labels.py +0 -0
  44. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/types/rec/__init__.py +0 -0
  45. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/types/rec/attribute.py +0 -0
  46. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/types/rec/base.py +0 -0
  47. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/types/rec/predicate.py +0 -0
  48. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/types/rec/record.py +0 -0
  49. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/types/rec/statement.py +0 -0
  50. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/types/rec/subject.py +0 -0
  51. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/utils/__init__.py +0 -0
  52. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/utils/alias.py +0 -0
  53. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/utils/file.py +0 -0
  54. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/utils/generate_jsonschema.py +0 -0
  55. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/utils/validate.py +0 -0
  56. {docling_core-2.0.0 → docling_core-2.0.1}/docling_core/utils/validators.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: docling-core
3
- Version: 2.0.0
3
+ Version: 2.0.1
4
4
  Summary: A python library to define and validate data types in Docling.
5
5
  Home-page: https://ds4sd.github.io/
6
6
  License: MIT
@@ -72,20 +72,20 @@ poetry run pytest test
72
72
  - You can validate your JSON objects using the pydantic class definition.
73
73
 
74
74
  ```py
75
- from docling_core.types import Document
75
+ from docling_core.types import DoclingDocument
76
76
 
77
77
  data_dict = {...} # here the object you want to validate, as a dictionary
78
- Document.model_validate(data_dict)
78
+ DoclingDocument.model_validate(data_dict)
79
79
 
80
80
  data_str = {...} # here the object as a JSON string
81
- Document.model_validate_json(data_str)
81
+ DoclingDocument.model_validate_json(data_str)
82
82
  ```
83
83
 
84
84
  - You can generate the JSON schema of a model with the script `generate_jsonschema`.
85
85
 
86
86
  ```py
87
- # for the `Document` type
88
- generate_jsonschema Document
87
+ # for the `DoclingDocument` type
88
+ generate_jsonschema DoclingDocument
89
89
 
90
90
  # for the use `Record` type
91
91
  generate_jsonschema Record
@@ -93,16 +93,16 @@ poetry run pytest test
93
93
 
94
94
  ## Documentation
95
95
 
96
- Docling supports 3 main data types:
96
+ Docling Core contains 3 top-level data types:
97
97
 
98
- - **Document** for publications like books, articles, reports, or patents. When Docling converts an unstructured PDF document, the generated JSON follows this schema.
99
- The Document type also models the metadata that may be attached to the converted document.
100
- Check [Document](docs/Document.json) for the full JSON schema.
98
+ - **DoclingDocument** for publications like books, articles, reports, or patents. When Docling converts an unstructured PDF document, the generated JSON follows this schema.
99
+ The DoclingDocument type also models the metadata that may be attached to the converted document.
100
+ Check [DoclingDocument](docs/DoclingDocument.json) for the full JSON schema.
101
101
  - **Record** for structured database records, centered on an entity or _subject_ that is provided with a list of attributes.
102
102
  Related to records, the statements can represent annotations on text by Natural Language Processing (NLP) tools.
103
- Check [Record](docs/Record.json) for the full JSON schema.
103
+ Check [Record](docs/Record.json) for the full JSON schema.
104
104
  - **Generic** for any data representation, ensuring minimal configuration and maximum flexibility.
105
- Check [Generic](docs/Generic.json) for the full JSON schema.
105
+ Check [Generic](docs/Generic.json) for the full JSON schema.
106
106
 
107
107
  The data schemas are defined using [pydantic](https://pydantic-docs.helpmanual.io/) models, which provide built-in processes to support the creation of data that adhere to those models.
108
108
 
@@ -36,20 +36,20 @@ poetry run pytest test
36
36
  - You can validate your JSON objects using the pydantic class definition.
37
37
 
38
38
  ```py
39
- from docling_core.types import Document
39
+ from docling_core.types import DoclingDocument
40
40
 
41
41
  data_dict = {...} # here the object you want to validate, as a dictionary
42
- Document.model_validate(data_dict)
42
+ DoclingDocument.model_validate(data_dict)
43
43
 
44
44
  data_str = {...} # here the object as a JSON string
45
- Document.model_validate_json(data_str)
45
+ DoclingDocument.model_validate_json(data_str)
46
46
  ```
47
47
 
48
48
  - You can generate the JSON schema of a model with the script `generate_jsonschema`.
49
49
 
50
50
  ```py
51
- # for the `Document` type
52
- generate_jsonschema Document
51
+ # for the `DoclingDocument` type
52
+ generate_jsonschema DoclingDocument
53
53
 
54
54
  # for the use `Record` type
55
55
  generate_jsonschema Record
@@ -57,16 +57,16 @@ poetry run pytest test
57
57
 
58
58
  ## Documentation
59
59
 
60
- Docling supports 3 main data types:
60
+ Docling Core contains 3 top-level data types:
61
61
 
62
- - **Document** for publications like books, articles, reports, or patents. When Docling converts an unstructured PDF document, the generated JSON follows this schema.
63
- The Document type also models the metadata that may be attached to the converted document.
64
- Check [Document](docs/Document.json) for the full JSON schema.
62
+ - **DoclingDocument** for publications like books, articles, reports, or patents. When Docling converts an unstructured PDF document, the generated JSON follows this schema.
63
+ The DoclingDocument type also models the metadata that may be attached to the converted document.
64
+ Check [DoclingDocument](docs/DoclingDocument.json) for the full JSON schema.
65
65
  - **Record** for structured database records, centered on an entity or _subject_ that is provided with a list of attributes.
66
66
  Related to records, the statements can represent annotations on text by Natural Language Processing (NLP) tools.
67
- Check [Record](docs/Record.json) for the full JSON schema.
67
+ Check [Record](docs/Record.json) for the full JSON schema.
68
68
  - **Generic** for any data representation, ensuring minimal configuration and maximum flexibility.
69
- Check [Generic](docs/Generic.json) for the full JSON schema.
69
+ Check [Generic](docs/Generic.json) for the full JSON schema.
70
70
 
71
71
  The data schemas are defined using [pydantic](https://pydantic-docs.helpmanual.io/) models, which provide built-in processes to support the creation of data that adhere to those models.
72
72
 
@@ -0,0 +1,10 @@
1
+ #
2
+ # Copyright IBM Corp. 2024 - 2024
3
+ # SPDX-License-Identifier: MIT
4
+ #
5
+
6
+ """Define the main types."""
7
+
8
+ from docling_core.types.doc.document import DoclingDocument
9
+ from docling_core.types.gen.generic import Generic
10
+ from docling_core.types.rec.record import Record
@@ -214,6 +214,7 @@ class DocumentOrigin(BaseModel):
214
214
  "application/vnd.openxmlformats-officedocument.presentationml.template",
215
215
  "application/vnd.openxmlformats-officedocument.presentationml.slideshow",
216
216
  "application/vnd.openxmlformats-officedocument.presentationml.presentation",
217
+ "text/asciidoc",
217
218
  ]
218
219
 
219
220
  @field_validator("binary_hash", mode="before")
@@ -18,7 +18,7 @@ from typing import Final
18
18
 
19
19
  from docling_core.utils.generate_jsonschema import generate_json_schema
20
20
 
21
- MODELS: Final = ["Document", "Record", "Generic"]
21
+ MODELS: Final = ["DoclingDocument", "Record", "Generic"]
22
22
 
23
23
 
24
24
  def _prepare_directory(folder: str, clean: bool = False) -> None:
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "docling-core"
3
- version = "2.0.0"
3
+ version = "2.0.1"
4
4
  description = "A python library to define and validate data types in Docling."
5
5
  license = "MIT"
6
6
  authors = [
@@ -1,29 +0,0 @@
1
- #
2
- # Copyright IBM Corp. 2024 - 2024
3
- # SPDX-License-Identifier: MIT
4
- #
5
-
6
- """Define the main types."""
7
-
8
- from docling_core.types.gen.generic import Generic # noqa
9
- from docling_core.types.legacy_doc.base import BoundingBox # noqa
10
- from docling_core.types.legacy_doc.base import Table # noqa
11
- from docling_core.types.legacy_doc.base import TableCell # noqa
12
- from docling_core.types.legacy_doc.base import ( # noqa
13
- BaseCell,
14
- BaseText,
15
- PageDimensions,
16
- PageReference,
17
- Prov,
18
- Ref,
19
- )
20
- from docling_core.types.legacy_doc.document import ( # noqa
21
- CCSDocumentDescription as DocumentDescription,
22
- )
23
- from docling_core.types.legacy_doc.document import ( # noqa
24
- CCSFileInfoObject as FileInfoObject,
25
- )
26
- from docling_core.types.legacy_doc.document import ( # noqa
27
- ExportedCCSDocument as Document,
28
- )
29
- from docling_core.types.rec.record import Record # noqa
File without changes