docling-core 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

Files changed (46) hide show
  1. docling_core/__init__.py +6 -0
  2. docling_core/py.typed +0 -0
  3. docling_core/resources/schemas/doc/ANN.json +171 -0
  4. docling_core/resources/schemas/doc/DOC.json +300 -0
  5. docling_core/resources/schemas/doc/OCR-output.json +166 -0
  6. docling_core/resources/schemas/doc/RAW.json +158 -0
  7. docling_core/resources/schemas/generated/ccs_document_schema.json +1071 -0
  8. docling_core/resources/schemas/generated/minimal_document_schema_flat.json +1129 -0
  9. docling_core/resources/schemas/search/search_doc_mapping.json +104 -0
  10. docling_core/resources/schemas/search/search_doc_mapping_v2.json +256 -0
  11. docling_core/search/__init__.py +6 -0
  12. docling_core/search/json_schema_to_search_mapper.py +406 -0
  13. docling_core/search/mapping.py +29 -0
  14. docling_core/search/meta.py +93 -0
  15. docling_core/search/package.py +56 -0
  16. docling_core/types/__init__.py +25 -0
  17. docling_core/types/base.py +248 -0
  18. docling_core/types/doc/__init__.py +6 -0
  19. docling_core/types/doc/base.py +199 -0
  20. docling_core/types/doc/doc_ann.py +76 -0
  21. docling_core/types/doc/doc_ocr.py +83 -0
  22. docling_core/types/doc/doc_raw.py +187 -0
  23. docling_core/types/doc/document.py +393 -0
  24. docling_core/types/gen/__init__.py +6 -0
  25. docling_core/types/gen/generic.py +33 -0
  26. docling_core/types/nlp/__init__.py +6 -0
  27. docling_core/types/nlp/qa.py +74 -0
  28. docling_core/types/nlp/qa_labels.py +118 -0
  29. docling_core/types/rec/__init__.py +6 -0
  30. docling_core/types/rec/attribute.py +55 -0
  31. docling_core/types/rec/base.py +90 -0
  32. docling_core/types/rec/predicate.py +133 -0
  33. docling_core/types/rec/record.py +95 -0
  34. docling_core/types/rec/statement.py +41 -0
  35. docling_core/types/rec/subject.py +77 -0
  36. docling_core/utils/__init__.py +6 -0
  37. docling_core/utils/alias.py +27 -0
  38. docling_core/utils/ds_generate_docs.py +144 -0
  39. docling_core/utils/ds_generate_jsonschema.py +62 -0
  40. docling_core/utils/validate.py +86 -0
  41. docling_core/utils/validators.py +100 -0
  42. docling_core-0.0.1.dist-info/LICENSE +21 -0
  43. docling_core-0.0.1.dist-info/METADATA +133 -0
  44. docling_core-0.0.1.dist-info/RECORD +46 -0
  45. docling_core-0.0.1.dist-info/WHEEL +4 -0
  46. docling_core-0.0.1.dist-info/entry_points.txt +5 -0
@@ -0,0 +1,158 @@
1
+ {
2
+ "$schema": "http://json-schema.org/schema#",
3
+ "type": "object",
4
+ "required": [
5
+ "info",
6
+ "pages"
7
+ ],
8
+ "properties": {
9
+ "info": {},
10
+ "pages": {
11
+ "type": "array",
12
+ "items": {
13
+ "type": "object",
14
+ "required": [
15
+ "height",
16
+ "width",
17
+ "dimensions",
18
+ "cells",
19
+ "paths",
20
+ "images",
21
+ "fonts"
22
+ ],
23
+ "properties": {
24
+ "height": {
25
+ "type": "number"
26
+ },
27
+ "width": {
28
+ "type": "number"
29
+ },
30
+ "dimensions": {
31
+ "type": "object"
32
+ },
33
+ "cells": {
34
+ "type": "array",
35
+ "items": {
36
+ "type": "object",
37
+ "required": [
38
+ "SEE_cell",
39
+ "SEE_confidence",
40
+ "angle",
41
+ "box",
42
+ "content",
43
+ "enumeration",
44
+ "font"
45
+ ],
46
+ "properties": {
47
+ "SEE_cell": {
48
+ "type": "boolean"
49
+ },
50
+ "SEE_confidence": {
51
+ "type": "number"
52
+ },
53
+ "angle": {
54
+ "type": "number"
55
+ },
56
+ "box": {
57
+ "type": "object",
58
+ "required": [
59
+ "baseline",
60
+ "device"
61
+ ],
62
+ "properties": {
63
+ "baseline": {
64
+ "type": "array",
65
+ "minItems": 4,
66
+ "maxItems": 4,
67
+ "items": {
68
+ "type": "number"
69
+ }
70
+ },
71
+ "device": {
72
+ "type": "array",
73
+ "minItems": 4,
74
+ "maxItems": 4,
75
+ "items": {
76
+ "type": "number"
77
+ }
78
+ }
79
+ }
80
+ },
81
+ "content": {
82
+ "type": "object",
83
+ "required": [
84
+ "rnormalized"
85
+ ],
86
+ "properties": {
87
+ "rnormalized": {
88
+ "type": "string"
89
+ }
90
+ }
91
+ },
92
+ "enumeration": {
93
+ "type": "object",
94
+ "required": [
95
+ "match",
96
+ "type"
97
+ ],
98
+ "properties": {
99
+ "match": {
100
+ "type": "integer"
101
+ },
102
+ "type": {
103
+ "type": "integer"
104
+ }
105
+ }
106
+ },
107
+ "font": {
108
+ "type": "object",
109
+ "required": [
110
+ "color",
111
+ "name",
112
+ "size"
113
+ ],
114
+ "properties": {
115
+ "color": {
116
+ "type": "array",
117
+ "minItems": 3,
118
+ "maxItems": 4,
119
+ "items": {
120
+ "type": "number"
121
+ }
122
+ },
123
+ "name": {
124
+ "type": "string"
125
+ },
126
+ "size": {
127
+ "type": "number"
128
+ }
129
+ }
130
+ }
131
+ }
132
+ }
133
+ },
134
+ "paths": {
135
+ "type": "array",
136
+ "items": {}
137
+ },
138
+ "vertical-lines": {
139
+ "type": "array",
140
+ "items": {}
141
+ },
142
+ "horizontal-lines": {
143
+ "type": "array",
144
+ "items": {}
145
+ },
146
+ "images": {
147
+ "type": "array",
148
+ "items": {}
149
+ },
150
+ "fonts": {
151
+ "type": "array",
152
+ "items": {}
153
+ }
154
+ }
155
+ }
156
+ }
157
+ }
158
+ }