docling-core 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

Files changed (46) hide show
  1. docling_core/__init__.py +6 -0
  2. docling_core/py.typed +0 -0
  3. docling_core/resources/schemas/doc/ANN.json +171 -0
  4. docling_core/resources/schemas/doc/DOC.json +300 -0
  5. docling_core/resources/schemas/doc/OCR-output.json +166 -0
  6. docling_core/resources/schemas/doc/RAW.json +158 -0
  7. docling_core/resources/schemas/generated/ccs_document_schema.json +1071 -0
  8. docling_core/resources/schemas/generated/minimal_document_schema_flat.json +1129 -0
  9. docling_core/resources/schemas/search/search_doc_mapping.json +104 -0
  10. docling_core/resources/schemas/search/search_doc_mapping_v2.json +256 -0
  11. docling_core/search/__init__.py +6 -0
  12. docling_core/search/json_schema_to_search_mapper.py +406 -0
  13. docling_core/search/mapping.py +29 -0
  14. docling_core/search/meta.py +93 -0
  15. docling_core/search/package.py +56 -0
  16. docling_core/types/__init__.py +25 -0
  17. docling_core/types/base.py +248 -0
  18. docling_core/types/doc/__init__.py +6 -0
  19. docling_core/types/doc/base.py +199 -0
  20. docling_core/types/doc/doc_ann.py +76 -0
  21. docling_core/types/doc/doc_ocr.py +83 -0
  22. docling_core/types/doc/doc_raw.py +187 -0
  23. docling_core/types/doc/document.py +393 -0
  24. docling_core/types/gen/__init__.py +6 -0
  25. docling_core/types/gen/generic.py +33 -0
  26. docling_core/types/nlp/__init__.py +6 -0
  27. docling_core/types/nlp/qa.py +74 -0
  28. docling_core/types/nlp/qa_labels.py +118 -0
  29. docling_core/types/rec/__init__.py +6 -0
  30. docling_core/types/rec/attribute.py +55 -0
  31. docling_core/types/rec/base.py +90 -0
  32. docling_core/types/rec/predicate.py +133 -0
  33. docling_core/types/rec/record.py +95 -0
  34. docling_core/types/rec/statement.py +41 -0
  35. docling_core/types/rec/subject.py +77 -0
  36. docling_core/utils/__init__.py +6 -0
  37. docling_core/utils/alias.py +27 -0
  38. docling_core/utils/ds_generate_docs.py +144 -0
  39. docling_core/utils/ds_generate_jsonschema.py +62 -0
  40. docling_core/utils/validate.py +86 -0
  41. docling_core/utils/validators.py +100 -0
  42. docling_core-0.0.1.dist-info/LICENSE +21 -0
  43. docling_core-0.0.1.dist-info/METADATA +133 -0
  44. docling_core-0.0.1.dist-info/RECORD +46 -0
  45. docling_core-0.0.1.dist-info/WHEEL +4 -0
  46. docling_core-0.0.1.dist-info/entry_points.txt +5 -0
@@ -0,0 +1,104 @@
1
+ {
2
+ "mappings": {
3
+ "dynamic": false,
4
+ "_size": {
5
+ "enabled": true
6
+ },
7
+ "_meta": {
8
+ "$ref": "ccs:schemas#/Document"
9
+ },
10
+ "properties": {
11
+ "description": {
12
+ "type": "object",
13
+ "properties": {
14
+ "abstract": {
15
+ "type": "text"
16
+ },
17
+ "affiliations": {
18
+ "type": "keyword"
19
+ },
20
+ "authors": {
21
+ "type": "keyword"
22
+ },
23
+ "title": {
24
+ "type": "text"
25
+ }
26
+ }
27
+ },
28
+ "figures": {
29
+ "type": "object",
30
+ "properties": {
31
+ "text": {
32
+ "type": "text"
33
+ },
34
+ "type": {
35
+ "type": "keyword"
36
+ },
37
+ "prov": {
38
+ "type": "object",
39
+ "properties": {
40
+ "page": {
41
+ "type": "integer"
42
+ }
43
+ }
44
+ }
45
+ }
46
+ },
47
+ "file-info": {
48
+ "type": "object",
49
+ "properties": {
50
+ "filename": {
51
+ "type": "text"
52
+ }
53
+ }
54
+ },
55
+ "main-text": {
56
+ "type": "object",
57
+ "properties": {
58
+ "text": {
59
+ "type": "text"
60
+ },
61
+ "type": {
62
+ "type": "keyword"
63
+ },
64
+ "name": {
65
+ "type": "keyword"
66
+ },
67
+ "prov": {
68
+ "type": "object",
69
+ "properties": {
70
+ "page": {
71
+ "type": "integer"
72
+ }
73
+ }
74
+ }
75
+ }
76
+ },
77
+ "_name": {
78
+ "type": "keyword"
79
+ },
80
+ "tables": {
81
+ "type": "object",
82
+ "properties": {
83
+ "text": {
84
+ "type": "text"
85
+ },
86
+ "type": {
87
+ "type": "keyword"
88
+ },
89
+ "prov": {
90
+ "type": "object",
91
+ "properties": {
92
+ "page": {
93
+ "type": "integer"
94
+ }
95
+ }
96
+ }
97
+ }
98
+ },
99
+ "type": {
100
+ "type": "keyword"
101
+ }
102
+ }
103
+ }
104
+ }
@@ -0,0 +1,256 @@
1
+ {
2
+ "settings": {
3
+ "analysis": {
4
+ "normalizer": {
5
+ "lowercase_asciifolding": {
6
+ "type": "custom",
7
+ "filter": [
8
+ "lowercase",
9
+ "asciifolding"
10
+ ]
11
+ }
12
+ }
13
+ }
14
+ },
15
+ "mappings": {
16
+ "dynamic": false,
17
+ "_size": {
18
+ "enabled": true
19
+ },
20
+ "_meta": {
21
+ "version": "1.0",
22
+ "$ref": "ccs:schemas#/Document"
23
+ },
24
+ "properties": {
25
+ "_name": {
26
+ "type": "text"
27
+ },
28
+ "identifiers": {
29
+ "properties": {
30
+ "_name": {
31
+ "ignore_above": 8191,
32
+ "type": "keyword"
33
+ },
34
+ "type": {
35
+ "ignore_above": 8191,
36
+ "type": "keyword"
37
+ },
38
+ "value": {
39
+ "ignore_above": 8191,
40
+ "type": "keyword"
41
+ }
42
+ }
43
+ },
44
+ "description": {
45
+ "properties": {
46
+ "abstract": {
47
+ "type": "text"
48
+ },
49
+ "affiliations": {
50
+ "properties": {
51
+ "name": {
52
+ "type": "text",
53
+ "fields": {
54
+ "lower": {
55
+ "normalizer": "lowercase_asciifolding",
56
+ "type": "keyword"
57
+ },
58
+ "keyword": {
59
+ "type": "keyword"
60
+ }
61
+ }
62
+ },
63
+ "id": {
64
+ "ignore_above": 8191,
65
+ "type": "keyword"
66
+ },
67
+ "source": {
68
+ "ignore_above": 8191,
69
+ "type": "keyword"
70
+ }
71
+ }
72
+ },
73
+ "authors": {
74
+ "properties": {
75
+ "name": {
76
+ "type": "text",
77
+ "fields": {
78
+ "lower": {
79
+ "normalizer": "lowercase_asciifolding",
80
+ "type": "keyword"
81
+ },
82
+ "keyword": {
83
+ "type": "keyword"
84
+ }
85
+ }
86
+ },
87
+ "affiliations": {
88
+ "properties": {
89
+ "name": {
90
+ "type": "text",
91
+ "fields": {
92
+ "lower": {
93
+ "normalizer": "lowercase_asciifolding",
94
+ "type": "keyword"
95
+ },
96
+ "keyword": {
97
+ "type": "keyword"
98
+ }
99
+ }
100
+ },
101
+ "id": {
102
+ "ignore_above": 8191,
103
+ "type": "keyword"
104
+ },
105
+ "source": {
106
+ "ignore_above": 8191,
107
+ "type": "keyword"
108
+ }
109
+ }
110
+ }
111
+ }
112
+ },
113
+ "title": {
114
+ "type": "text"
115
+ },
116
+ "subjects": {
117
+ "type": "text",
118
+ "fields": {
119
+ "keyword": {
120
+ "ignore_above": 8191,
121
+ "type": "keyword"
122
+ }
123
+ }
124
+ },
125
+ "publication_date": {
126
+ "type": "date"
127
+ },
128
+ "languages": {
129
+ "ignore_above": 8191,
130
+ "type": "keyword"
131
+ },
132
+ "publishers": {
133
+ "ignore_above": 8191,
134
+ "type": "keyword"
135
+ },
136
+ "url_refs": {
137
+ "ignore_above": 8191,
138
+ "type": "keyword"
139
+ },
140
+ "references": {
141
+ "properties": {
142
+ "_name": {
143
+ "ignore_above": 8191,
144
+ "type": "keyword"
145
+ },
146
+ "type": {
147
+ "ignore_above": 8191,
148
+ "type": "keyword"
149
+ },
150
+ "value": {
151
+ "ignore_above": 8191,
152
+ "type": "keyword"
153
+ }
154
+ }
155
+ },
156
+ "logs": {
157
+ "properties": {
158
+ "date": {
159
+ "type": "date"
160
+ },
161
+ "agent": {
162
+ "ignore_above": 8191,
163
+ "type": "keyword"
164
+ },
165
+ "comment": {
166
+ "type": "text"
167
+ },
168
+ "type": {
169
+ "ignore_above": 8191,
170
+ "type": "keyword"
171
+ }
172
+ }
173
+ }
174
+ }
175
+ },
176
+ "figures": {
177
+ "properties": {
178
+ "text": {
179
+ "type": "text"
180
+ },
181
+ "type": {
182
+ "ignore_above": 8191,
183
+ "type": "keyword"
184
+ },
185
+ "prov": {
186
+ "properties": {
187
+ "page": {
188
+ "type": "integer"
189
+ }
190
+ }
191
+ }
192
+ }
193
+ },
194
+ "file-info": {
195
+ "properties": {
196
+ "filename-prov": {
197
+ "ignore_above": 8191,
198
+ "type": "keyword"
199
+ },
200
+ "filename": {
201
+ "ignore_above": 8191,
202
+ "type": "keyword"
203
+ },
204
+ "document-hash": {
205
+ "ignore_above": 8191,
206
+ "type": "keyword"
207
+ }
208
+ }
209
+ },
210
+ "main-text": {
211
+ "properties": {
212
+ "text": {
213
+ "type": "text"
214
+ },
215
+ "type": {
216
+ "ignore_above": 8191,
217
+ "type": "keyword"
218
+ },
219
+ "name": {
220
+ "ignore_above": 8191,
221
+ "type": "keyword"
222
+ },
223
+ "prov": {
224
+ "properties": {
225
+ "page": {
226
+ "type": "integer"
227
+ }
228
+ }
229
+ }
230
+ }
231
+ },
232
+ "tables": {
233
+ "properties": {
234
+ "text": {
235
+ "type": "text"
236
+ },
237
+ "type": {
238
+ "ignore_above": 8191,
239
+ "type": "keyword"
240
+ },
241
+ "prov": {
242
+ "properties": {
243
+ "page": {
244
+ "type": "integer"
245
+ }
246
+ }
247
+ }
248
+ }
249
+ },
250
+ "type": {
251
+ "ignore_above": 8191,
252
+ "type": "keyword"
253
+ }
254
+ }
255
+ }
256
+ }
@@ -0,0 +1,6 @@
1
+ #
2
+ # Copyright IBM Corp. 2024 - 2024
3
+ # SPDX-License-Identifier: MIT
4
+ #
5
+
6
+ """Package for models and utility functions for search database mappings."""