docling-core 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

Files changed (46) hide show
  1. docling_core/__init__.py +6 -0
  2. docling_core/py.typed +0 -0
  3. docling_core/resources/schemas/doc/ANN.json +171 -0
  4. docling_core/resources/schemas/doc/DOC.json +300 -0
  5. docling_core/resources/schemas/doc/OCR-output.json +166 -0
  6. docling_core/resources/schemas/doc/RAW.json +158 -0
  7. docling_core/resources/schemas/generated/ccs_document_schema.json +1071 -0
  8. docling_core/resources/schemas/generated/minimal_document_schema_flat.json +1129 -0
  9. docling_core/resources/schemas/search/search_doc_mapping.json +104 -0
  10. docling_core/resources/schemas/search/search_doc_mapping_v2.json +256 -0
  11. docling_core/search/__init__.py +6 -0
  12. docling_core/search/json_schema_to_search_mapper.py +406 -0
  13. docling_core/search/mapping.py +29 -0
  14. docling_core/search/meta.py +93 -0
  15. docling_core/search/package.py +56 -0
  16. docling_core/types/__init__.py +25 -0
  17. docling_core/types/base.py +248 -0
  18. docling_core/types/doc/__init__.py +6 -0
  19. docling_core/types/doc/base.py +199 -0
  20. docling_core/types/doc/doc_ann.py +76 -0
  21. docling_core/types/doc/doc_ocr.py +83 -0
  22. docling_core/types/doc/doc_raw.py +187 -0
  23. docling_core/types/doc/document.py +393 -0
  24. docling_core/types/gen/__init__.py +6 -0
  25. docling_core/types/gen/generic.py +33 -0
  26. docling_core/types/nlp/__init__.py +6 -0
  27. docling_core/types/nlp/qa.py +74 -0
  28. docling_core/types/nlp/qa_labels.py +118 -0
  29. docling_core/types/rec/__init__.py +6 -0
  30. docling_core/types/rec/attribute.py +55 -0
  31. docling_core/types/rec/base.py +90 -0
  32. docling_core/types/rec/predicate.py +133 -0
  33. docling_core/types/rec/record.py +95 -0
  34. docling_core/types/rec/statement.py +41 -0
  35. docling_core/types/rec/subject.py +77 -0
  36. docling_core/utils/__init__.py +6 -0
  37. docling_core/utils/alias.py +27 -0
  38. docling_core/utils/ds_generate_docs.py +144 -0
  39. docling_core/utils/ds_generate_jsonschema.py +62 -0
  40. docling_core/utils/validate.py +86 -0
  41. docling_core/utils/validators.py +100 -0
  42. docling_core-0.0.1.dist-info/LICENSE +21 -0
  43. docling_core-0.0.1.dist-info/METADATA +133 -0
  44. docling_core-0.0.1.dist-info/RECORD +46 -0
  45. docling_core-0.0.1.dist-info/WHEEL +4 -0
  46. docling_core-0.0.1.dist-info/entry_points.txt +5 -0
@@ -0,0 +1,1071 @@
1
+ {
2
+ "title": "ExportedCCSDocument",
3
+ "type": "object",
4
+ "properties": {
5
+ "name": {
6
+ "title": "Name",
7
+ "type": "string"
8
+ },
9
+ "type": {
10
+ "title": "Type",
11
+ "default": "pdf-document",
12
+ "x-es": {
13
+ "type": "keyword",
14
+ "ignore_above": 8191
15
+ },
16
+ "type": "string"
17
+ },
18
+ "description": {
19
+ "$ref": "#/definitions/CCSDocumentDescription"
20
+ },
21
+ "file-info": {
22
+ "$ref": "#/definitions/CCSFileInfoObject"
23
+ },
24
+ "main-text": {
25
+ "title": "Main-Text",
26
+ "type": "array",
27
+ "items": {
28
+ "anyOf": [
29
+ {
30
+ "$ref": "#/definitions/Ref"
31
+ },
32
+ {
33
+ "$ref": "#/definitions/RecursiveList"
34
+ },
35
+ {
36
+ "$ref": "#/definitions/BaseText"
37
+ }
38
+ ]
39
+ }
40
+ },
41
+ "figures": {
42
+ "title": "Figures",
43
+ "type": "array",
44
+ "items": {
45
+ "$ref": "#/definitions/BaseCell"
46
+ }
47
+ },
48
+ "tables": {
49
+ "title": "Tables",
50
+ "type": "array",
51
+ "items": {
52
+ "$ref": "#/definitions/Table"
53
+ }
54
+ },
55
+ "bitmaps": {
56
+ "title": "Bitmaps",
57
+ "type": "array",
58
+ "items": {
59
+ "$ref": "#/definitions/BitmapObject"
60
+ }
61
+ },
62
+ "equations": {
63
+ "title": "Equations",
64
+ "type": "array",
65
+ "items": {
66
+ "$ref": "#/definitions/BaseCell"
67
+ }
68
+ },
69
+ "footnotes": {
70
+ "title": "Footnotes",
71
+ "type": "array",
72
+ "items": {
73
+ "$ref": "#/definitions/BaseText"
74
+ }
75
+ },
76
+ "page-dimensions": {
77
+ "title": "Page-Dimensions",
78
+ "type": "array",
79
+ "items": {
80
+ "$ref": "#/definitions/PageDimensions"
81
+ }
82
+ },
83
+ "page-footers": {
84
+ "title": "Page-Footers",
85
+ "type": "array",
86
+ "items": {
87
+ "$ref": "#/definitions/BaseText"
88
+ }
89
+ },
90
+ "page-headers": {
91
+ "title": "Page-Headers",
92
+ "type": "array",
93
+ "items": {
94
+ "$ref": "#/definitions/BaseText"
95
+ }
96
+ },
97
+ "_s3_data": {
98
+ "$ref": "#/definitions/S3Data"
99
+ }
100
+ },
101
+ "required": [
102
+ "name",
103
+ "description",
104
+ "file-info",
105
+ "main-text"
106
+ ],
107
+ "definitions": {
108
+ "Affiliation": {
109
+ "title": "Affiliation",
110
+ "type": "object",
111
+ "properties": {
112
+ "name": {
113
+ "title": "Name",
114
+ "x-es": {
115
+ "fields": {
116
+ "lower": {
117
+ "normalizer": "lowercase_asciifolding",
118
+ "type": "keyword"
119
+ },
120
+ "keyword": {
121
+ "type": "keyword"
122
+ }
123
+ },
124
+ "type": "keyword"
125
+ },
126
+ "type": "string"
127
+ },
128
+ "id": {
129
+ "title": "Id",
130
+ "x-es": {
131
+ "type": "keyword",
132
+ "ignore_above": 8191
133
+ },
134
+ "type": "string"
135
+ },
136
+ "source": {
137
+ "title": "Source",
138
+ "x-es": {
139
+ "type": "keyword",
140
+ "ignore_above": 8191
141
+ },
142
+ "type": "string"
143
+ }
144
+ },
145
+ "required": [
146
+ "name",
147
+ "id",
148
+ "source"
149
+ ]
150
+ },
151
+ "Author": {
152
+ "title": "Author",
153
+ "type": "object",
154
+ "properties": {
155
+ "name": {
156
+ "title": "Name",
157
+ "x-es": {
158
+ "fields": {
159
+ "lower": {
160
+ "normalizer": "lowercase_asciifolding",
161
+ "type": "keyword"
162
+ },
163
+ "keyword": {
164
+ "type": "keyword"
165
+ }
166
+ },
167
+ "type": "keyword"
168
+ },
169
+ "type": "string"
170
+ },
171
+ "id": {
172
+ "title": "Id",
173
+ "x-es": {
174
+ "type": "keyword",
175
+ "ignore_above": 8191
176
+ },
177
+ "type": "string"
178
+ },
179
+ "source": {
180
+ "title": "Source",
181
+ "x-es": {
182
+ "type": "keyword",
183
+ "ignore_above": 8191
184
+ },
185
+ "type": "string"
186
+ },
187
+ "affiliations": {
188
+ "title": "Affiliations",
189
+ "type": "array",
190
+ "items": {
191
+ "$ref": "#/definitions/Affiliation"
192
+ }
193
+ }
194
+ },
195
+ "required": [
196
+ "name",
197
+ "id",
198
+ "source",
199
+ "affiliations"
200
+ ]
201
+ },
202
+ "Identifier": {
203
+ "title": "Identifier",
204
+ "type": "object",
205
+ "properties": {
206
+ "type": {
207
+ "title": "Type",
208
+ "x-es": {
209
+ "type": "keyword",
210
+ "ignore_above": 8191
211
+ },
212
+ "type": "string"
213
+ },
214
+ "value": {
215
+ "title": "Value",
216
+ "x-es": {
217
+ "type": "keyword",
218
+ "ignore_above": 8191
219
+ },
220
+ "type": "string"
221
+ },
222
+ "_name": {
223
+ "title": " Name",
224
+ "x-es": {
225
+ "type": "keyword",
226
+ "ignore_above": 8191
227
+ },
228
+ "type": "string"
229
+ }
230
+ },
231
+ "required": [
232
+ "type",
233
+ "value",
234
+ "_name"
235
+ ]
236
+ },
237
+ "Log": {
238
+ "title": "Log",
239
+ "type": "object",
240
+ "properties": {
241
+ "agent": {
242
+ "title": "Agent",
243
+ "x-es": {
244
+ "type": "keyword",
245
+ "ignore_above": 8191
246
+ },
247
+ "type": "string"
248
+ },
249
+ "type": {
250
+ "title": "Type",
251
+ "x-es": {
252
+ "type": "keyword",
253
+ "ignore_above": 8191
254
+ },
255
+ "type": "string"
256
+ },
257
+ "comment": {
258
+ "title": "Comment",
259
+ "type": "string"
260
+ },
261
+ "date": {
262
+ "title": "Date",
263
+ "x-es": {
264
+ "type": "date"
265
+ },
266
+ "type": "string",
267
+ "format": "date-time"
268
+ }
269
+ },
270
+ "required": [
271
+ "agent",
272
+ "type",
273
+ "comment",
274
+ "date"
275
+ ]
276
+ },
277
+ "CCSDocumentDescription": {
278
+ "title": "CCSDocumentDescription",
279
+ "type": "object",
280
+ "properties": {
281
+ "title": {
282
+ "title": "Title",
283
+ "type": "string"
284
+ },
285
+ "abstract": {
286
+ "title": "Abstract",
287
+ "type": "array",
288
+ "items": {
289
+ "type": "string"
290
+ }
291
+ },
292
+ "authors": {
293
+ "title": "Authors",
294
+ "type": "array",
295
+ "items": {
296
+ "$ref": "#/definitions/Author"
297
+ }
298
+ },
299
+ "affiliations": {
300
+ "title": "Affiliations",
301
+ "type": "array",
302
+ "items": {
303
+ "$ref": "#/definitions/Affiliation"
304
+ }
305
+ },
306
+ "subjects": {
307
+ "title": "Subjects",
308
+ "x-es": {
309
+ "fields": {
310
+ "keyword": {
311
+ "ignore_above": 8191,
312
+ "type": "keyword"
313
+ }
314
+ }
315
+ },
316
+ "type": "array",
317
+ "items": {
318
+ "type": "string"
319
+ }
320
+ },
321
+ "keywords": {
322
+ "title": "Keywords",
323
+ "x-es": {
324
+ "type": "keyword",
325
+ "ignore_above": 8191
326
+ },
327
+ "type": "array",
328
+ "items": {
329
+ "type": "string"
330
+ }
331
+ },
332
+ "publication_date": {
333
+ "title": "Publication Date",
334
+ "x-es": {
335
+ "type": "date"
336
+ },
337
+ "type": "string",
338
+ "format": "date-time"
339
+ },
340
+ "languages": {
341
+ "title": "Languages",
342
+ "x-es": {
343
+ "type": "keyword",
344
+ "ignore_above": 8191
345
+ },
346
+ "type": "array",
347
+ "items": {
348
+ "type": "string"
349
+ }
350
+ },
351
+ "publishers": {
352
+ "title": "Publishers",
353
+ "x-es": {
354
+ "type": "keyword",
355
+ "ignore_above": 8191
356
+ },
357
+ "type": "array",
358
+ "items": {
359
+ "type": "string"
360
+ }
361
+ },
362
+ "url_refs": {
363
+ "title": "Url Refs",
364
+ "x-es": {
365
+ "type": "keyword",
366
+ "ignore_above": 8191
367
+ },
368
+ "type": "array",
369
+ "items": {
370
+ "type": "string"
371
+ }
372
+ },
373
+ "references": {
374
+ "title": "References",
375
+ "type": "array",
376
+ "items": {
377
+ "$ref": "#/definitions/Identifier"
378
+ }
379
+ },
380
+ "advanced": {
381
+ "title": "Advanced",
382
+ "additionalProperties": {},
383
+ "properties": {},
384
+ "type": "object"
385
+ },
386
+ "analytics": {
387
+ "title": "Analytics",
388
+ "additionalProperties": {},
389
+ "properties": {},
390
+ "type": "object"
391
+ },
392
+ "logs": {
393
+ "title": "Logs",
394
+ "type": "array",
395
+ "items": {
396
+ "$ref": "#/definitions/Log"
397
+ }
398
+ }
399
+ },
400
+ "required": [
401
+ "title",
402
+ "abstract",
403
+ "authors",
404
+ "affiliations",
405
+ "subjects",
406
+ "keywords",
407
+ "publication_date",
408
+ "languages",
409
+ "publishers",
410
+ "url_refs",
411
+ "references",
412
+ "advanced",
413
+ "analytics",
414
+ "logs"
415
+ ]
416
+ },
417
+ "CCSFileInfoDescription": {
418
+ "title": "CCSFileInfoDescription",
419
+ "type": "object",
420
+ "properties": {
421
+ "author": {
422
+ "title": "Author",
423
+ "type": "array",
424
+ "items": {
425
+ "type": "string"
426
+ }
427
+ },
428
+ "keywords": {
429
+ "title": "Keywords",
430
+ "type": "string"
431
+ },
432
+ "subject": {
433
+ "title": "Subject",
434
+ "type": "string"
435
+ },
436
+ "title": {
437
+ "title": "Title",
438
+ "type": "string"
439
+ }
440
+ }
441
+ },
442
+ "PageReference": {
443
+ "title": "PageReference",
444
+ "type": "object",
445
+ "properties": {
446
+ "hash": {
447
+ "title": "Hash",
448
+ "type": "string"
449
+ },
450
+ "model": {
451
+ "title": "Model",
452
+ "type": "string"
453
+ },
454
+ "page": {
455
+ "title": "Page",
456
+ "type": "integer"
457
+ }
458
+ },
459
+ "required": [
460
+ "hash",
461
+ "model",
462
+ "page"
463
+ ]
464
+ },
465
+ "CCSFileInfoObject": {
466
+ "title": "CCSFileInfoObject",
467
+ "type": "object",
468
+ "properties": {
469
+ "filename": {
470
+ "title": "Filename",
471
+ "x-es": {
472
+ "type": "keyword",
473
+ "ignore_above": 8191
474
+ },
475
+ "type": "string"
476
+ },
477
+ "#-pages": {
478
+ "title": "#-Pages",
479
+ "type": "integer"
480
+ },
481
+ "document-hash": {
482
+ "title": "Document-Hash",
483
+ "type": "string"
484
+ },
485
+ "collection-name": {
486
+ "title": "Collection-Name",
487
+ "type": "string"
488
+ },
489
+ "description": {
490
+ "$ref": "#/definitions/CCSFileInfoDescription"
491
+ },
492
+ "page-hashes": {
493
+ "title": "Page-Hashes",
494
+ "type": "array",
495
+ "items": {
496
+ "$ref": "#/definitions/PageReference"
497
+ }
498
+ },
499
+ "filename-prov": {
500
+ "title": "Filename-Prov",
501
+ "x-es": {
502
+ "type": "keyword",
503
+ "ignore_above": 8191
504
+ },
505
+ "type": "string"
506
+ }
507
+ },
508
+ "required": [
509
+ "filename",
510
+ "#-pages",
511
+ "document-hash",
512
+ "collection-name",
513
+ "description",
514
+ "page-hashes",
515
+ "filename-prov"
516
+ ]
517
+ },
518
+ "Ref": {
519
+ "title": "Ref",
520
+ "type": "object",
521
+ "properties": {
522
+ "name": {
523
+ "title": "Name",
524
+ "type": "string"
525
+ },
526
+ "type": {
527
+ "title": "Type",
528
+ "type": "string"
529
+ },
530
+ "__ref": {
531
+ "title": " Ref",
532
+ "type": "string"
533
+ }
534
+ },
535
+ "required": [
536
+ "name",
537
+ "type",
538
+ "__ref"
539
+ ]
540
+ },
541
+ "Prov": {
542
+ "title": "Prov",
543
+ "type": "object",
544
+ "properties": {
545
+ "bbox": {
546
+ "title": "Bbox",
547
+ "type": "array",
548
+ "items": [
549
+ {
550
+ "type": "number"
551
+ },
552
+ {
553
+ "type": "number"
554
+ },
555
+ {
556
+ "type": "number"
557
+ },
558
+ {
559
+ "type": "number"
560
+ }
561
+ ]
562
+ },
563
+ "page": {
564
+ "title": "Page",
565
+ "type": "integer"
566
+ },
567
+ "span": {
568
+ "title": "Span",
569
+ "type": "array",
570
+ "items": [
571
+ {
572
+ "type": "integer"
573
+ },
574
+ {
575
+ "type": "integer"
576
+ }
577
+ ]
578
+ }
579
+ },
580
+ "required": [
581
+ "bbox",
582
+ "page",
583
+ "span"
584
+ ]
585
+ },
586
+ "ListItem": {
587
+ "title": "ListItem",
588
+ "type": "object",
589
+ "properties": {
590
+ "text": {
591
+ "title": "Text",
592
+ "type": "string"
593
+ },
594
+ "type": {
595
+ "title": "Type",
596
+ "x-es": {
597
+ "type": "keyword",
598
+ "ignore_above": 8191
599
+ },
600
+ "type": "string"
601
+ },
602
+ "name": {
603
+ "title": "Name",
604
+ "x-es": {
605
+ "type": "keyword",
606
+ "ignore_above": 8191
607
+ },
608
+ "type": "string"
609
+ },
610
+ "font": {
611
+ "title": "Font",
612
+ "type": "string"
613
+ },
614
+ "prov": {
615
+ "title": "Prov",
616
+ "type": "array",
617
+ "items": {
618
+ "$ref": "#/definitions/Prov"
619
+ }
620
+ },
621
+ "identifier": {
622
+ "title": "Identifier",
623
+ "type": "string"
624
+ }
625
+ },
626
+ "required": [
627
+ "text",
628
+ "type",
629
+ "identifier"
630
+ ]
631
+ },
632
+ "RecursiveList": {
633
+ "title": "RecursiveList",
634
+ "type": "object",
635
+ "properties": {
636
+ "data": {
637
+ "title": "Data",
638
+ "type": "array",
639
+ "items": {
640
+ "anyOf": [
641
+ {
642
+ "$ref": "#/definitions/ListItem"
643
+ },
644
+ {
645
+ "$ref": "#/definitions/RecursiveList"
646
+ }
647
+ ]
648
+ }
649
+ },
650
+ "name": {
651
+ "title": "Name",
652
+ "type": "string"
653
+ },
654
+ "prov": {
655
+ "title": "Prov",
656
+ "type": "array",
657
+ "items": {
658
+ "$ref": "#/definitions/Prov"
659
+ }
660
+ },
661
+ "type": {
662
+ "title": "Type",
663
+ "type": "string"
664
+ }
665
+ },
666
+ "required": [
667
+ "data",
668
+ "type"
669
+ ]
670
+ },
671
+ "BaseText": {
672
+ "title": "BaseText",
673
+ "type": "object",
674
+ "properties": {
675
+ "text": {
676
+ "title": "Text",
677
+ "type": "string"
678
+ },
679
+ "type": {
680
+ "title": "Type",
681
+ "x-es": {
682
+ "type": "keyword",
683
+ "ignore_above": 8191
684
+ },
685
+ "type": "string"
686
+ },
687
+ "name": {
688
+ "title": "Name",
689
+ "x-es": {
690
+ "type": "keyword",
691
+ "ignore_above": 8191
692
+ },
693
+ "type": "string"
694
+ },
695
+ "font": {
696
+ "title": "Font",
697
+ "type": "string"
698
+ },
699
+ "prov": {
700
+ "title": "Prov",
701
+ "type": "array",
702
+ "items": {
703
+ "$ref": "#/definitions/Prov"
704
+ }
705
+ }
706
+ },
707
+ "required": [
708
+ "text",
709
+ "type"
710
+ ]
711
+ },
712
+ "BoundingBoxContainer": {
713
+ "title": "BoundingBoxContainer",
714
+ "type": "object",
715
+ "properties": {
716
+ "min": {
717
+ "title": "Min",
718
+ "type": "array",
719
+ "items": [
720
+ {
721
+ "type": "number"
722
+ },
723
+ {
724
+ "type": "number"
725
+ },
726
+ {
727
+ "type": "number"
728
+ },
729
+ {
730
+ "type": "number"
731
+ }
732
+ ]
733
+ },
734
+ "max": {
735
+ "title": "Max",
736
+ "type": "array",
737
+ "items": [
738
+ {
739
+ "type": "number"
740
+ },
741
+ {
742
+ "type": "number"
743
+ },
744
+ {
745
+ "type": "number"
746
+ },
747
+ {
748
+ "type": "number"
749
+ }
750
+ ]
751
+ }
752
+ },
753
+ "required": [
754
+ "min",
755
+ "max"
756
+ ]
757
+ },
758
+ "CellsContainer": {
759
+ "title": "CellsContainer",
760
+ "type": "object",
761
+ "properties": {
762
+ "data": {
763
+ "title": "Data",
764
+ "type": "array",
765
+ "items": {
766
+ "type": "array",
767
+ "items": [
768
+ {
769
+ "type": "number"
770
+ },
771
+ {
772
+ "type": "number"
773
+ },
774
+ {
775
+ "type": "number"
776
+ },
777
+ {
778
+ "type": "number"
779
+ },
780
+ {
781
+ "type": "string"
782
+ },
783
+ {
784
+ "type": "string"
785
+ }
786
+ ]
787
+ }
788
+ },
789
+ "header": {
790
+ "title": "Header",
791
+ "default": [
792
+ "x0",
793
+ "y0",
794
+ "x1",
795
+ "y1",
796
+ "font",
797
+ "text"
798
+ ],
799
+ "type": "array",
800
+ "items": [
801
+ {
802
+ "enum": [
803
+ "x0"
804
+ ],
805
+ "type": "string"
806
+ },
807
+ {
808
+ "enum": [
809
+ "y0"
810
+ ],
811
+ "type": "string"
812
+ },
813
+ {
814
+ "enum": [
815
+ "x1"
816
+ ],
817
+ "type": "string"
818
+ },
819
+ {
820
+ "enum": [
821
+ "y1"
822
+ ],
823
+ "type": "string"
824
+ },
825
+ {
826
+ "enum": [
827
+ "font"
828
+ ],
829
+ "type": "string"
830
+ },
831
+ {
832
+ "enum": [
833
+ "text"
834
+ ],
835
+ "type": "string"
836
+ }
837
+ ]
838
+ }
839
+ },
840
+ "required": [
841
+ "data"
842
+ ]
843
+ },
844
+ "BaseCell": {
845
+ "title": "BaseCell",
846
+ "type": "object",
847
+ "properties": {
848
+ "bounding-box": {
849
+ "$ref": "#/definitions/BoundingBoxContainer"
850
+ },
851
+ "cells": {
852
+ "$ref": "#/definitions/CellsContainer"
853
+ },
854
+ "prov": {
855
+ "title": "Prov",
856
+ "type": "array",
857
+ "items": {
858
+ "$ref": "#/definitions/Prov"
859
+ }
860
+ },
861
+ "text": {
862
+ "title": "Text",
863
+ "type": "string"
864
+ },
865
+ "type": {
866
+ "title": "Type",
867
+ "x-es": {
868
+ "type": "keyword",
869
+ "ignore_above": 8191
870
+ },
871
+ "type": "string"
872
+ }
873
+ },
874
+ "required": [
875
+ "bounding-box",
876
+ "cells",
877
+ "text",
878
+ "type"
879
+ ]
880
+ },
881
+ "TableCell": {
882
+ "title": "TableCell",
883
+ "type": "object",
884
+ "properties": {
885
+ "bbox": {
886
+ "title": "Bbox",
887
+ "type": "array",
888
+ "items": [
889
+ {
890
+ "type": "number"
891
+ },
892
+ {
893
+ "type": "number"
894
+ },
895
+ {
896
+ "type": "number"
897
+ },
898
+ {
899
+ "type": "number"
900
+ }
901
+ ]
902
+ },
903
+ "spans": {
904
+ "title": "Spans",
905
+ "type": "array",
906
+ "items": {
907
+ "type": "array",
908
+ "items": [
909
+ {
910
+ "type": "integer"
911
+ },
912
+ {
913
+ "type": "integer"
914
+ }
915
+ ]
916
+ }
917
+ },
918
+ "text": {
919
+ "title": "Text",
920
+ "type": "string"
921
+ },
922
+ "type": {
923
+ "title": "Type",
924
+ "type": "string"
925
+ }
926
+ },
927
+ "required": [
928
+ "text",
929
+ "type"
930
+ ]
931
+ },
932
+ "Table": {
933
+ "title": "Table",
934
+ "type": "object",
935
+ "properties": {
936
+ "num_cols": {
937
+ "title": "Num Cols",
938
+ "type": "integer"
939
+ },
940
+ "num_rows": {
941
+ "title": "Num Rows",
942
+ "type": "integer"
943
+ },
944
+ "bounding_box": {
945
+ "$ref": "#/definitions/BoundingBoxContainer"
946
+ },
947
+ "cells": {
948
+ "$ref": "#/definitions/CellsContainer"
949
+ },
950
+ "data": {
951
+ "title": "Data",
952
+ "type": "array",
953
+ "items": {
954
+ "type": "array",
955
+ "items": {
956
+ "$ref": "#/definitions/TableCell"
957
+ }
958
+ }
959
+ },
960
+ "model": {
961
+ "title": "Model",
962
+ "type": "string"
963
+ },
964
+ "prov": {
965
+ "$ref": "#/definitions/Prov"
966
+ },
967
+ "text": {
968
+ "title": "Text",
969
+ "type": "string"
970
+ },
971
+ "type": {
972
+ "title": "Type",
973
+ "type": "string"
974
+ }
975
+ },
976
+ "required": [
977
+ "num_cols",
978
+ "num_rows",
979
+ "data",
980
+ "text",
981
+ "type"
982
+ ]
983
+ },
984
+ "BitmapObject": {
985
+ "title": "BitmapObject",
986
+ "type": "object",
987
+ "properties": {
988
+ "type": {
989
+ "title": "Type",
990
+ "type": "string"
991
+ },
992
+ "bounding_box": {
993
+ "$ref": "#/definitions/BoundingBoxContainer"
994
+ },
995
+ "prov": {
996
+ "$ref": "#/definitions/Prov"
997
+ }
998
+ },
999
+ "required": [
1000
+ "type",
1001
+ "bounding_box",
1002
+ "prov"
1003
+ ]
1004
+ },
1005
+ "PageDimensions": {
1006
+ "title": "PageDimensions",
1007
+ "type": "object",
1008
+ "properties": {
1009
+ "height": {
1010
+ "title": "Height",
1011
+ "type": "number"
1012
+ },
1013
+ "page": {
1014
+ "title": "Page",
1015
+ "type": "integer"
1016
+ },
1017
+ "width": {
1018
+ "title": "Width",
1019
+ "type": "number"
1020
+ }
1021
+ },
1022
+ "required": [
1023
+ "height",
1024
+ "page",
1025
+ "width"
1026
+ ]
1027
+ },
1028
+ "S3Resource": {
1029
+ "title": "S3Resource",
1030
+ "type": "object",
1031
+ "properties": {
1032
+ "mime": {
1033
+ "title": "Mime",
1034
+ "type": "string"
1035
+ },
1036
+ "path": {
1037
+ "title": "Path",
1038
+ "type": "string"
1039
+ },
1040
+ "page": {
1041
+ "title": "Page",
1042
+ "type": "integer"
1043
+ }
1044
+ },
1045
+ "required": [
1046
+ "mime",
1047
+ "path"
1048
+ ]
1049
+ },
1050
+ "S3Data": {
1051
+ "title": "S3Data",
1052
+ "type": "object",
1053
+ "properties": {
1054
+ "pdf_document": {
1055
+ "title": "Pdf Document",
1056
+ "type": "array",
1057
+ "items": {
1058
+ "$ref": "#/definitions/S3Resource"
1059
+ }
1060
+ },
1061
+ "pdf_pages": {
1062
+ "title": "Pdf Pages",
1063
+ "type": "array",
1064
+ "items": {
1065
+ "$ref": "#/definitions/S3Resource"
1066
+ }
1067
+ }
1068
+ }
1069
+ }
1070
+ }
1071
+ }