docling-core 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of docling-core might be problematic. Click here for more details.

Files changed (46) hide show
  1. docling_core/__init__.py +6 -0
  2. docling_core/py.typed +0 -0
  3. docling_core/resources/schemas/doc/ANN.json +171 -0
  4. docling_core/resources/schemas/doc/DOC.json +300 -0
  5. docling_core/resources/schemas/doc/OCR-output.json +166 -0
  6. docling_core/resources/schemas/doc/RAW.json +158 -0
  7. docling_core/resources/schemas/generated/ccs_document_schema.json +1071 -0
  8. docling_core/resources/schemas/generated/minimal_document_schema_flat.json +1129 -0
  9. docling_core/resources/schemas/search/search_doc_mapping.json +104 -0
  10. docling_core/resources/schemas/search/search_doc_mapping_v2.json +256 -0
  11. docling_core/search/__init__.py +6 -0
  12. docling_core/search/json_schema_to_search_mapper.py +406 -0
  13. docling_core/search/mapping.py +29 -0
  14. docling_core/search/meta.py +93 -0
  15. docling_core/search/package.py +56 -0
  16. docling_core/types/__init__.py +25 -0
  17. docling_core/types/base.py +248 -0
  18. docling_core/types/doc/__init__.py +6 -0
  19. docling_core/types/doc/base.py +199 -0
  20. docling_core/types/doc/doc_ann.py +76 -0
  21. docling_core/types/doc/doc_ocr.py +83 -0
  22. docling_core/types/doc/doc_raw.py +187 -0
  23. docling_core/types/doc/document.py +393 -0
  24. docling_core/types/gen/__init__.py +6 -0
  25. docling_core/types/gen/generic.py +33 -0
  26. docling_core/types/nlp/__init__.py +6 -0
  27. docling_core/types/nlp/qa.py +74 -0
  28. docling_core/types/nlp/qa_labels.py +118 -0
  29. docling_core/types/rec/__init__.py +6 -0
  30. docling_core/types/rec/attribute.py +55 -0
  31. docling_core/types/rec/base.py +90 -0
  32. docling_core/types/rec/predicate.py +133 -0
  33. docling_core/types/rec/record.py +95 -0
  34. docling_core/types/rec/statement.py +41 -0
  35. docling_core/types/rec/subject.py +77 -0
  36. docling_core/utils/__init__.py +6 -0
  37. docling_core/utils/alias.py +27 -0
  38. docling_core/utils/ds_generate_docs.py +144 -0
  39. docling_core/utils/ds_generate_jsonschema.py +62 -0
  40. docling_core/utils/validate.py +86 -0
  41. docling_core/utils/validators.py +100 -0
  42. docling_core-0.0.1.dist-info/LICENSE +21 -0
  43. docling_core-0.0.1.dist-info/METADATA +133 -0
  44. docling_core-0.0.1.dist-info/RECORD +46 -0
  45. docling_core-0.0.1.dist-info/WHEEL +4 -0
  46. docling_core-0.0.1.dist-info/entry_points.txt +5 -0
@@ -0,0 +1,1129 @@
1
+ {
2
+ "title": "MinimalDocument",
3
+ "type": "object",
4
+ "properties": {
5
+ "name": {
6
+ "title": "Name",
7
+ "type": "string"
8
+ },
9
+ "type": {
10
+ "title": "Type",
11
+ "default": "document",
12
+ "type": "string"
13
+ },
14
+ "description": {
15
+ "title": "CCSDocumentDescription",
16
+ "type": "object",
17
+ "properties": {
18
+ "title": {
19
+ "title": "Title",
20
+ "type": "string"
21
+ },
22
+ "abstract": {
23
+ "title": "Abstract",
24
+ "type": "array",
25
+ "items": {
26
+ "type": "string"
27
+ }
28
+ },
29
+ "authors": {
30
+ "title": "Authors",
31
+ "type": "array",
32
+ "items": {
33
+ "title": "Author",
34
+ "type": "object",
35
+ "properties": {
36
+ "name": {
37
+ "title": "Name",
38
+ "x-es": {
39
+ "fields": {
40
+ "lower": {
41
+ "normalizer": "lowercase_asciifolding",
42
+ "type": "keyword"
43
+ },
44
+ "keyword": {
45
+ "type": "keyword"
46
+ }
47
+ },
48
+ "type": "keyword"
49
+ },
50
+ "type": "string"
51
+ },
52
+ "id": {
53
+ "title": "Id",
54
+ "x-es": {
55
+ "type": "keyword",
56
+ "ignore_above": 8191
57
+ },
58
+ "type": "string"
59
+ },
60
+ "source": {
61
+ "title": "Source",
62
+ "x-es": {
63
+ "type": "keyword",
64
+ "ignore_above": 8191
65
+ },
66
+ "type": "string"
67
+ },
68
+ "affiliations": {
69
+ "title": "Affiliations",
70
+ "type": "array",
71
+ "items": {
72
+ "title": "Affiliation",
73
+ "type": "object",
74
+ "properties": {
75
+ "name": {
76
+ "title": "Name",
77
+ "x-es": {
78
+ "fields": {
79
+ "lower": {
80
+ "normalizer": "lowercase_asciifolding",
81
+ "type": "keyword"
82
+ },
83
+ "keyword": {
84
+ "type": "keyword"
85
+ }
86
+ },
87
+ "type": "keyword"
88
+ },
89
+ "type": "string"
90
+ },
91
+ "id": {
92
+ "title": "Id",
93
+ "x-es": {
94
+ "type": "keyword",
95
+ "ignore_above": 8191
96
+ },
97
+ "type": "string"
98
+ },
99
+ "source": {
100
+ "title": "Source",
101
+ "x-es": {
102
+ "type": "keyword",
103
+ "ignore_above": 8191
104
+ },
105
+ "type": "string"
106
+ }
107
+ },
108
+ "required": [
109
+ "name",
110
+ "id",
111
+ "source"
112
+ ]
113
+ }
114
+ }
115
+ },
116
+ "required": [
117
+ "name",
118
+ "id",
119
+ "source",
120
+ "affiliations"
121
+ ]
122
+ }
123
+ },
124
+ "affiliations": {
125
+ "title": "Affiliations",
126
+ "type": "array",
127
+ "items": {
128
+ "title": "Affiliation",
129
+ "type": "object",
130
+ "properties": {
131
+ "name": {
132
+ "title": "Name",
133
+ "x-es": {
134
+ "fields": {
135
+ "lower": {
136
+ "normalizer": "lowercase_asciifolding",
137
+ "type": "keyword"
138
+ },
139
+ "keyword": {
140
+ "type": "keyword"
141
+ }
142
+ },
143
+ "type": "keyword"
144
+ },
145
+ "type": "string"
146
+ },
147
+ "id": {
148
+ "title": "Id",
149
+ "x-es": {
150
+ "type": "keyword",
151
+ "ignore_above": 8191
152
+ },
153
+ "type": "string"
154
+ },
155
+ "source": {
156
+ "title": "Source",
157
+ "x-es": {
158
+ "type": "keyword",
159
+ "ignore_above": 8191
160
+ },
161
+ "type": "string"
162
+ }
163
+ },
164
+ "required": [
165
+ "name",
166
+ "id",
167
+ "source"
168
+ ]
169
+ }
170
+ },
171
+ "subjects": {
172
+ "title": "Subjects",
173
+ "x-es": {
174
+ "fields": {
175
+ "keyword": {
176
+ "ignore_above": 8191,
177
+ "type": "keyword"
178
+ }
179
+ }
180
+ },
181
+ "type": "array",
182
+ "items": {
183
+ "type": "string"
184
+ }
185
+ },
186
+ "keywords": {
187
+ "title": "Keywords",
188
+ "x-es": {
189
+ "type": "keyword",
190
+ "ignore_above": 8191
191
+ },
192
+ "type": "array",
193
+ "items": {
194
+ "type": "string"
195
+ }
196
+ },
197
+ "publication_date": {
198
+ "title": "Publication Date",
199
+ "x-es": {
200
+ "type": "date"
201
+ },
202
+ "type": "string",
203
+ "format": "date-time"
204
+ },
205
+ "languages": {
206
+ "title": "Languages",
207
+ "x-es": {
208
+ "type": "keyword",
209
+ "ignore_above": 8191
210
+ },
211
+ "type": "array",
212
+ "items": {
213
+ "type": "string"
214
+ }
215
+ },
216
+ "publishers": {
217
+ "title": "Publishers",
218
+ "x-es": {
219
+ "type": "keyword",
220
+ "ignore_above": 8191
221
+ },
222
+ "type": "array",
223
+ "items": {
224
+ "type": "string"
225
+ }
226
+ },
227
+ "url_refs": {
228
+ "title": "Url Refs",
229
+ "x-es": {
230
+ "type": "keyword",
231
+ "ignore_above": 8191
232
+ },
233
+ "type": "array",
234
+ "items": {
235
+ "type": "string"
236
+ }
237
+ },
238
+ "references": {
239
+ "title": "References",
240
+ "type": "array",
241
+ "items": {
242
+ "title": "Identifier",
243
+ "type": "object",
244
+ "properties": {
245
+ "type": {
246
+ "title": "Type",
247
+ "x-es": {
248
+ "type": "keyword",
249
+ "ignore_above": 8191
250
+ },
251
+ "type": "string"
252
+ },
253
+ "value": {
254
+ "title": "Value",
255
+ "x-es": {
256
+ "type": "keyword",
257
+ "ignore_above": 8191
258
+ },
259
+ "type": "string"
260
+ },
261
+ "_name": {
262
+ "title": " Name",
263
+ "x-es": {
264
+ "type": "keyword",
265
+ "ignore_above": 8191
266
+ },
267
+ "type": "string"
268
+ }
269
+ },
270
+ "required": [
271
+ "type",
272
+ "value",
273
+ "_name"
274
+ ]
275
+ }
276
+ },
277
+ "advanced": {
278
+ "title": "Advanced",
279
+ "additionalProperties": {},
280
+ "properties": {},
281
+ "type": "object"
282
+ },
283
+ "analytics": {
284
+ "title": "Analytics",
285
+ "additionalProperties": {},
286
+ "properties": {},
287
+ "type": "object"
288
+ },
289
+ "logs": {
290
+ "title": "Logs",
291
+ "type": "array",
292
+ "items": {
293
+ "title": "Log",
294
+ "type": "object",
295
+ "properties": {
296
+ "agent": {
297
+ "title": "Agent",
298
+ "x-es": {
299
+ "type": "keyword",
300
+ "ignore_above": 8191
301
+ },
302
+ "type": "string"
303
+ },
304
+ "type": {
305
+ "title": "Type",
306
+ "x-es": {
307
+ "type": "keyword",
308
+ "ignore_above": 8191
309
+ },
310
+ "type": "string"
311
+ },
312
+ "comment": {
313
+ "title": "Comment",
314
+ "type": "string"
315
+ },
316
+ "date": {
317
+ "title": "Date",
318
+ "x-es": {
319
+ "type": "date"
320
+ },
321
+ "type": "string",
322
+ "format": "date-time"
323
+ }
324
+ },
325
+ "required": [
326
+ "agent",
327
+ "type",
328
+ "comment",
329
+ "date"
330
+ ]
331
+ }
332
+ }
333
+ },
334
+ "required": [
335
+ "title",
336
+ "abstract",
337
+ "authors",
338
+ "affiliations",
339
+ "subjects",
340
+ "keywords",
341
+ "publication_date",
342
+ "languages",
343
+ "publishers",
344
+ "url_refs",
345
+ "references",
346
+ "advanced",
347
+ "analytics",
348
+ "logs"
349
+ ]
350
+ },
351
+ "file-info": {
352
+ "title": "FileInfoObject",
353
+ "type": "object",
354
+ "properties": {
355
+ "filename": {
356
+ "title": "Filename",
357
+ "x-es": {
358
+ "type": "keyword",
359
+ "ignore_above": 8191
360
+ },
361
+ "type": "string"
362
+ }
363
+ },
364
+ "required": [
365
+ "filename"
366
+ ]
367
+ },
368
+ "main-text": {
369
+ "title": "Main-Text",
370
+ "type": "array",
371
+ "items": {
372
+ "anyOf": [
373
+ {
374
+ "title": "Ref",
375
+ "type": "object",
376
+ "properties": {
377
+ "name": {
378
+ "title": "Name",
379
+ "type": "string"
380
+ },
381
+ "type": {
382
+ "title": "Type",
383
+ "type": "string"
384
+ },
385
+ "__ref": {
386
+ "title": " Ref",
387
+ "type": "string"
388
+ }
389
+ },
390
+ "required": [
391
+ "name",
392
+ "type",
393
+ "__ref"
394
+ ]
395
+ },
396
+ {
397
+ "title": "BaseList",
398
+ "type": "object",
399
+ "properties": {
400
+ "data": {
401
+ "title": "Data",
402
+ "type": "array",
403
+ "items": {
404
+ "title": "ListItem",
405
+ "type": "object",
406
+ "properties": {
407
+ "text": {
408
+ "title": "Text",
409
+ "type": "string"
410
+ },
411
+ "type": {
412
+ "title": "Type",
413
+ "x-es": {
414
+ "type": "keyword",
415
+ "ignore_above": 8191
416
+ },
417
+ "type": "string"
418
+ },
419
+ "name": {
420
+ "title": "Name",
421
+ "x-es": {
422
+ "type": "keyword",
423
+ "ignore_above": 8191
424
+ },
425
+ "type": "string"
426
+ },
427
+ "font": {
428
+ "title": "Font",
429
+ "type": "string"
430
+ },
431
+ "prov": {
432
+ "title": "Prov",
433
+ "type": "array",
434
+ "items": {
435
+ "title": "Prov",
436
+ "type": "object",
437
+ "properties": {
438
+ "bbox": {
439
+ "title": "Bbox",
440
+ "type": "array",
441
+ "items": [
442
+ {
443
+ "type": "number"
444
+ },
445
+ {
446
+ "type": "number"
447
+ },
448
+ {
449
+ "type": "number"
450
+ },
451
+ {
452
+ "type": "number"
453
+ }
454
+ ]
455
+ },
456
+ "page": {
457
+ "title": "Page",
458
+ "type": "integer"
459
+ },
460
+ "span": {
461
+ "title": "Span",
462
+ "type": "array",
463
+ "items": [
464
+ {
465
+ "type": "integer"
466
+ },
467
+ {
468
+ "type": "integer"
469
+ }
470
+ ]
471
+ }
472
+ },
473
+ "required": [
474
+ "bbox",
475
+ "page",
476
+ "span"
477
+ ]
478
+ }
479
+ },
480
+ "identifier": {
481
+ "title": "Identifier",
482
+ "type": "string"
483
+ }
484
+ },
485
+ "required": [
486
+ "text",
487
+ "type",
488
+ "identifier"
489
+ ]
490
+ }
491
+ },
492
+ "name": {
493
+ "title": "Name",
494
+ "type": "string"
495
+ },
496
+ "prov": {
497
+ "title": "Prov",
498
+ "type": "array",
499
+ "items": {
500
+ "title": "Prov",
501
+ "type": "object",
502
+ "properties": {
503
+ "bbox": {
504
+ "title": "Bbox",
505
+ "type": "array",
506
+ "items": [
507
+ {
508
+ "type": "number"
509
+ },
510
+ {
511
+ "type": "number"
512
+ },
513
+ {
514
+ "type": "number"
515
+ },
516
+ {
517
+ "type": "number"
518
+ }
519
+ ]
520
+ },
521
+ "page": {
522
+ "title": "Page",
523
+ "type": "integer"
524
+ },
525
+ "span": {
526
+ "title": "Span",
527
+ "type": "array",
528
+ "items": [
529
+ {
530
+ "type": "integer"
531
+ },
532
+ {
533
+ "type": "integer"
534
+ }
535
+ ]
536
+ }
537
+ },
538
+ "required": [
539
+ "bbox",
540
+ "page",
541
+ "span"
542
+ ]
543
+ }
544
+ },
545
+ "type": {
546
+ "title": "Type",
547
+ "type": "string"
548
+ }
549
+ },
550
+ "required": [
551
+ "data",
552
+ "type"
553
+ ]
554
+ },
555
+ {
556
+ "title": "BaseText",
557
+ "type": "object",
558
+ "properties": {
559
+ "text": {
560
+ "title": "Text",
561
+ "type": "string"
562
+ },
563
+ "type": {
564
+ "title": "Type",
565
+ "x-es": {
566
+ "type": "keyword",
567
+ "ignore_above": 8191
568
+ },
569
+ "type": "string"
570
+ },
571
+ "name": {
572
+ "title": "Name",
573
+ "x-es": {
574
+ "type": "keyword",
575
+ "ignore_above": 8191
576
+ },
577
+ "type": "string"
578
+ },
579
+ "font": {
580
+ "title": "Font",
581
+ "type": "string"
582
+ },
583
+ "prov": {
584
+ "title": "Prov",
585
+ "type": "array",
586
+ "items": {
587
+ "title": "Prov",
588
+ "type": "object",
589
+ "properties": {
590
+ "bbox": {
591
+ "title": "Bbox",
592
+ "type": "array",
593
+ "items": [
594
+ {
595
+ "type": "number"
596
+ },
597
+ {
598
+ "type": "number"
599
+ },
600
+ {
601
+ "type": "number"
602
+ },
603
+ {
604
+ "type": "number"
605
+ }
606
+ ]
607
+ },
608
+ "page": {
609
+ "title": "Page",
610
+ "type": "integer"
611
+ },
612
+ "span": {
613
+ "title": "Span",
614
+ "type": "array",
615
+ "items": [
616
+ {
617
+ "type": "integer"
618
+ },
619
+ {
620
+ "type": "integer"
621
+ }
622
+ ]
623
+ }
624
+ },
625
+ "required": [
626
+ "bbox",
627
+ "page",
628
+ "span"
629
+ ]
630
+ }
631
+ }
632
+ },
633
+ "required": [
634
+ "text",
635
+ "type"
636
+ ]
637
+ }
638
+ ]
639
+ }
640
+ },
641
+ "figures": {
642
+ "title": "Figures",
643
+ "type": "array",
644
+ "items": {
645
+ "title": "BaseCell",
646
+ "type": "object",
647
+ "properties": {
648
+ "bounding-box": {
649
+ "title": "BoundingBoxContainer",
650
+ "type": "object",
651
+ "properties": {
652
+ "min": {
653
+ "title": "Min",
654
+ "type": "array",
655
+ "items": [
656
+ {
657
+ "type": "number"
658
+ },
659
+ {
660
+ "type": "number"
661
+ },
662
+ {
663
+ "type": "number"
664
+ },
665
+ {
666
+ "type": "number"
667
+ }
668
+ ]
669
+ },
670
+ "max": {
671
+ "title": "Max",
672
+ "type": "array",
673
+ "items": [
674
+ {
675
+ "type": "number"
676
+ },
677
+ {
678
+ "type": "number"
679
+ },
680
+ {
681
+ "type": "number"
682
+ },
683
+ {
684
+ "type": "number"
685
+ }
686
+ ]
687
+ }
688
+ },
689
+ "required": [
690
+ "min",
691
+ "max"
692
+ ]
693
+ },
694
+ "cells": {
695
+ "title": "CellsContainer",
696
+ "type": "object",
697
+ "properties": {
698
+ "data": {
699
+ "title": "Data",
700
+ "type": "array",
701
+ "items": {
702
+ "type": "array",
703
+ "items": [
704
+ {
705
+ "type": "number"
706
+ },
707
+ {
708
+ "type": "number"
709
+ },
710
+ {
711
+ "type": "number"
712
+ },
713
+ {
714
+ "type": "number"
715
+ },
716
+ {
717
+ "type": "string"
718
+ },
719
+ {
720
+ "type": "string"
721
+ }
722
+ ]
723
+ }
724
+ },
725
+ "header": {
726
+ "title": "Header",
727
+ "default": [
728
+ "x0",
729
+ "y0",
730
+ "x1",
731
+ "y1",
732
+ "font",
733
+ "text"
734
+ ],
735
+ "type": "array",
736
+ "items": [
737
+ {
738
+ "enum": [
739
+ "x0"
740
+ ],
741
+ "type": "string"
742
+ },
743
+ {
744
+ "enum": [
745
+ "y0"
746
+ ],
747
+ "type": "string"
748
+ },
749
+ {
750
+ "enum": [
751
+ "x1"
752
+ ],
753
+ "type": "string"
754
+ },
755
+ {
756
+ "enum": [
757
+ "y1"
758
+ ],
759
+ "type": "string"
760
+ },
761
+ {
762
+ "enum": [
763
+ "font"
764
+ ],
765
+ "type": "string"
766
+ },
767
+ {
768
+ "enum": [
769
+ "text"
770
+ ],
771
+ "type": "string"
772
+ }
773
+ ]
774
+ }
775
+ },
776
+ "required": [
777
+ "data"
778
+ ]
779
+ },
780
+ "prov": {
781
+ "title": "Prov",
782
+ "type": "array",
783
+ "items": {
784
+ "title": "Prov",
785
+ "type": "object",
786
+ "properties": {
787
+ "bbox": {
788
+ "title": "Bbox",
789
+ "type": "array",
790
+ "items": [
791
+ {
792
+ "type": "number"
793
+ },
794
+ {
795
+ "type": "number"
796
+ },
797
+ {
798
+ "type": "number"
799
+ },
800
+ {
801
+ "type": "number"
802
+ }
803
+ ]
804
+ },
805
+ "page": {
806
+ "title": "Page",
807
+ "type": "integer"
808
+ },
809
+ "span": {
810
+ "title": "Span",
811
+ "type": "array",
812
+ "items": [
813
+ {
814
+ "type": "integer"
815
+ },
816
+ {
817
+ "type": "integer"
818
+ }
819
+ ]
820
+ }
821
+ },
822
+ "required": [
823
+ "bbox",
824
+ "page",
825
+ "span"
826
+ ]
827
+ }
828
+ },
829
+ "text": {
830
+ "title": "Text",
831
+ "type": "string"
832
+ },
833
+ "type": {
834
+ "title": "Type",
835
+ "x-es": {
836
+ "type": "keyword",
837
+ "ignore_above": 8191
838
+ },
839
+ "type": "string"
840
+ }
841
+ },
842
+ "required": [
843
+ "bounding-box",
844
+ "cells",
845
+ "text",
846
+ "type"
847
+ ]
848
+ }
849
+ },
850
+ "tables": {
851
+ "title": "Tables",
852
+ "type": "array",
853
+ "items": {
854
+ "title": "Table",
855
+ "type": "object",
856
+ "properties": {
857
+ "num_cols": {
858
+ "title": "Num Cols",
859
+ "type": "integer"
860
+ },
861
+ "num_rows": {
862
+ "title": "Num Rows",
863
+ "type": "integer"
864
+ },
865
+ "bounding_box": {
866
+ "title": "BoundingBoxContainer",
867
+ "type": "object",
868
+ "properties": {
869
+ "min": {
870
+ "title": "Min",
871
+ "type": "array",
872
+ "items": [
873
+ {
874
+ "type": "number"
875
+ },
876
+ {
877
+ "type": "number"
878
+ },
879
+ {
880
+ "type": "number"
881
+ },
882
+ {
883
+ "type": "number"
884
+ }
885
+ ]
886
+ },
887
+ "max": {
888
+ "title": "Max",
889
+ "type": "array",
890
+ "items": [
891
+ {
892
+ "type": "number"
893
+ },
894
+ {
895
+ "type": "number"
896
+ },
897
+ {
898
+ "type": "number"
899
+ },
900
+ {
901
+ "type": "number"
902
+ }
903
+ ]
904
+ }
905
+ },
906
+ "required": [
907
+ "min",
908
+ "max"
909
+ ]
910
+ },
911
+ "cells": {
912
+ "title": "CellsContainer",
913
+ "type": "object",
914
+ "properties": {
915
+ "data": {
916
+ "title": "Data",
917
+ "type": "array",
918
+ "items": {
919
+ "type": "array",
920
+ "items": [
921
+ {
922
+ "type": "number"
923
+ },
924
+ {
925
+ "type": "number"
926
+ },
927
+ {
928
+ "type": "number"
929
+ },
930
+ {
931
+ "type": "number"
932
+ },
933
+ {
934
+ "type": "string"
935
+ },
936
+ {
937
+ "type": "string"
938
+ }
939
+ ]
940
+ }
941
+ },
942
+ "header": {
943
+ "title": "Header",
944
+ "default": [
945
+ "x0",
946
+ "y0",
947
+ "x1",
948
+ "y1",
949
+ "font",
950
+ "text"
951
+ ],
952
+ "type": "array",
953
+ "items": [
954
+ {
955
+ "enum": [
956
+ "x0"
957
+ ],
958
+ "type": "string"
959
+ },
960
+ {
961
+ "enum": [
962
+ "y0"
963
+ ],
964
+ "type": "string"
965
+ },
966
+ {
967
+ "enum": [
968
+ "x1"
969
+ ],
970
+ "type": "string"
971
+ },
972
+ {
973
+ "enum": [
974
+ "y1"
975
+ ],
976
+ "type": "string"
977
+ },
978
+ {
979
+ "enum": [
980
+ "font"
981
+ ],
982
+ "type": "string"
983
+ },
984
+ {
985
+ "enum": [
986
+ "text"
987
+ ],
988
+ "type": "string"
989
+ }
990
+ ]
991
+ }
992
+ },
993
+ "required": [
994
+ "data"
995
+ ]
996
+ },
997
+ "data": {
998
+ "title": "Data",
999
+ "type": "array",
1000
+ "items": {
1001
+ "type": "array",
1002
+ "items": {
1003
+ "title": "TableCell",
1004
+ "type": "object",
1005
+ "properties": {
1006
+ "bbox": {
1007
+ "title": "Bbox",
1008
+ "type": "array",
1009
+ "items": [
1010
+ {
1011
+ "type": "number"
1012
+ },
1013
+ {
1014
+ "type": "number"
1015
+ },
1016
+ {
1017
+ "type": "number"
1018
+ },
1019
+ {
1020
+ "type": "number"
1021
+ }
1022
+ ]
1023
+ },
1024
+ "spans": {
1025
+ "title": "Spans",
1026
+ "type": "array",
1027
+ "items": {
1028
+ "type": "array",
1029
+ "items": [
1030
+ {
1031
+ "type": "integer"
1032
+ },
1033
+ {
1034
+ "type": "integer"
1035
+ }
1036
+ ]
1037
+ }
1038
+ },
1039
+ "text": {
1040
+ "title": "Text",
1041
+ "type": "string"
1042
+ },
1043
+ "type": {
1044
+ "title": "Type",
1045
+ "type": "string"
1046
+ }
1047
+ },
1048
+ "required": [
1049
+ "text",
1050
+ "type"
1051
+ ]
1052
+ }
1053
+ }
1054
+ },
1055
+ "model": {
1056
+ "title": "Model",
1057
+ "type": "string"
1058
+ },
1059
+ "prov": {
1060
+ "title": "Prov",
1061
+ "type": "object",
1062
+ "properties": {
1063
+ "bbox": {
1064
+ "title": "Bbox",
1065
+ "type": "array",
1066
+ "items": [
1067
+ {
1068
+ "type": "number"
1069
+ },
1070
+ {
1071
+ "type": "number"
1072
+ },
1073
+ {
1074
+ "type": "number"
1075
+ },
1076
+ {
1077
+ "type": "number"
1078
+ }
1079
+ ]
1080
+ },
1081
+ "page": {
1082
+ "title": "Page",
1083
+ "type": "integer"
1084
+ },
1085
+ "span": {
1086
+ "title": "Span",
1087
+ "type": "array",
1088
+ "items": [
1089
+ {
1090
+ "type": "integer"
1091
+ },
1092
+ {
1093
+ "type": "integer"
1094
+ }
1095
+ ]
1096
+ }
1097
+ },
1098
+ "required": [
1099
+ "bbox",
1100
+ "page",
1101
+ "span"
1102
+ ]
1103
+ },
1104
+ "text": {
1105
+ "title": "Text",
1106
+ "type": "string"
1107
+ },
1108
+ "type": {
1109
+ "title": "Type",
1110
+ "type": "string"
1111
+ }
1112
+ },
1113
+ "required": [
1114
+ "num_cols",
1115
+ "num_rows",
1116
+ "data",
1117
+ "text",
1118
+ "type"
1119
+ ]
1120
+ }
1121
+ }
1122
+ },
1123
+ "required": [
1124
+ "name",
1125
+ "description",
1126
+ "file-info",
1127
+ "main-text"
1128
+ ]
1129
+ }