elementary-assertions 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/CHANGELOG.md +353 -0
  2. package/LICENSE +21 -0
  3. package/README.md +211 -0
  4. package/bin/elementary-assertions.js +8 -0
  5. package/docs/DEV_TOOLING.md +98 -0
  6. package/docs/NPM_RELEASE.md +177 -0
  7. package/docs/OPERATIONAL.md +159 -0
  8. package/docs/RELEASE_NOTES_TEMPLATE.md +37 -0
  9. package/docs/REPO_WORKFLOWS.md +48 -0
  10. package/package.json +46 -0
  11. package/src/core/accepted-annotations.js +44 -0
  12. package/src/core/assertions.js +2304 -0
  13. package/src/core/determinism.js +95 -0
  14. package/src/core/diagnostics.js +496 -0
  15. package/src/core/ids.js +9 -0
  16. package/src/core/mention-builder.js +272 -0
  17. package/src/core/mention-evidence.js +52 -0
  18. package/src/core/mention-head-resolution.js +108 -0
  19. package/src/core/mention-materialization.js +31 -0
  20. package/src/core/mentions.js +149 -0
  21. package/src/core/output.js +296 -0
  22. package/src/core/projection.js +192 -0
  23. package/src/core/roles.js +164 -0
  24. package/src/core/strings.js +7 -0
  25. package/src/core/tokens.js +53 -0
  26. package/src/core/upstream.js +31 -0
  27. package/src/index.js +6 -0
  28. package/src/render/index.js +5 -0
  29. package/src/render/layouts/compact.js +10 -0
  30. package/src/render/layouts/meaning.js +7 -0
  31. package/src/render/layouts/readable.js +7 -0
  32. package/src/render/layouts/table.js +7 -0
  33. package/src/render/render.js +931 -0
  34. package/src/run.js +278 -0
  35. package/src/schema/seed.elementary-assertions.schema.json +1751 -0
  36. package/src/tools/cli.js +158 -0
  37. package/src/tools/index.js +6 -0
  38. package/src/tools/io.js +55 -0
  39. package/src/validate/ajv.js +20 -0
  40. package/src/validate/coverage.js +215 -0
  41. package/src/validate/determinism.js +115 -0
  42. package/src/validate/diagnostics-strict.js +392 -0
  43. package/src/validate/errors.js +19 -0
  44. package/src/validate/index.js +20 -0
  45. package/src/validate/integrity.js +41 -0
  46. package/src/validate/invariants.js +157 -0
  47. package/src/validate/references.js +110 -0
  48. package/src/validate/schema.js +50 -0
@@ -0,0 +1,1751 @@
1
+ {
2
+ "$schema": "https://json-schema.org/draft/2020-12/schema",
3
+ "$id": "https://elementary-assertions/schemas/seed.elementary-assertions.schema.json",
4
+ "title": "Elementary Assertions Seed",
5
+ "type": "object",
6
+ "additionalProperties": false,
7
+ "required": [
8
+ "seed_id",
9
+ "stage",
10
+ "index_basis",
11
+ "canonical_text",
12
+ "segments",
13
+ "tokens",
14
+ "mentions",
15
+ "assertions",
16
+ "relation_projection",
17
+ "accepted_annotations",
18
+ "wiki_title_evidence",
19
+ "diagnostics",
20
+ "coverage",
21
+ "sources"
22
+ ],
23
+ "properties": {
24
+ "schema_version": {
25
+ "type": "string",
26
+ "minLength": 1
27
+ },
28
+ "seed_id": {
29
+ "type": "string",
30
+ "minLength": 1
31
+ },
32
+ "stage": {
33
+ "type": "string",
34
+ "const": "elementary_assertions"
35
+ },
36
+ "index_basis": {
37
+ "type": "object",
38
+ "additionalProperties": false,
39
+ "required": [
40
+ "text_field",
41
+ "span_unit"
42
+ ],
43
+ "properties": {
44
+ "text_field": {
45
+ "type": "string",
46
+ "const": "canonical_text"
47
+ },
48
+ "span_unit": {
49
+ "type": "string",
50
+ "enum": [
51
+ "utf16_code_units"
52
+ ]
53
+ }
54
+ }
55
+ },
56
+ "canonical_text": {
57
+ "type": "string"
58
+ },
59
+ "segments": {
60
+ "type": "array",
61
+ "minItems": 1,
62
+ "items": {
63
+ "$ref": "#/$defs/segment"
64
+ }
65
+ },
66
+ "tokens": {
67
+ "type": "array",
68
+ "minItems": 1,
69
+ "items": {
70
+ "$ref": "#/$defs/token"
71
+ }
72
+ },
73
+ "mentions": {
74
+ "type": "array",
75
+ "minItems": 1,
76
+ "items": {
77
+ "$ref": "#/$defs/mention"
78
+ }
79
+ },
80
+ "assertions": {
81
+ "type": "array",
82
+ "minItems": 0,
83
+ "items": {
84
+ "$ref": "#/$defs/assertion"
85
+ }
86
+ },
87
+ "relation_projection": {
88
+ "$ref": "#/$defs/relation_projection"
89
+ },
90
+ "accepted_annotations": {
91
+ "type": "array",
92
+ "items": {
93
+ "$ref": "#/$defs/accepted_annotation_summary"
94
+ }
95
+ },
96
+ "wiki_title_evidence": {
97
+ "$ref": "#/$defs/wiki_title_evidence"
98
+ },
99
+ "diagnostics": {
100
+ "$ref": "#/$defs/diagnostics"
101
+ },
102
+ "coverage": {
103
+ "type": "object",
104
+ "additionalProperties": false,
105
+ "required": [
106
+ "primary_mention_ids",
107
+ "covered_primary_mention_ids",
108
+ "uncovered_primary_mention_ids",
109
+ "unresolved"
110
+ ],
111
+ "properties": {
112
+ "primary_mention_ids": {
113
+ "type": "array",
114
+ "items": {
115
+ "type": "string",
116
+ "minLength": 1
117
+ }
118
+ },
119
+ "covered_primary_mention_ids": {
120
+ "type": "array",
121
+ "items": {
122
+ "type": "string",
123
+ "minLength": 1
124
+ }
125
+ },
126
+ "uncovered_primary_mention_ids": {
127
+ "type": "array",
128
+ "items": {
129
+ "type": "string",
130
+ "minLength": 1
131
+ }
132
+ },
133
+ "unresolved": {
134
+ "type": "array",
135
+ "items": {
136
+ "$ref": "#/$defs/unresolved_item"
137
+ }
138
+ }
139
+ }
140
+ },
141
+ "sources": {
142
+ "type": "object",
143
+ "additionalProperties": false,
144
+ "required": [
145
+ "inputs",
146
+ "pipeline"
147
+ ],
148
+ "properties": {
149
+ "inputs": {
150
+ "type": "array",
151
+ "minItems": 1,
152
+ "items": {
153
+ "$ref": "#/$defs/source_input"
154
+ }
155
+ },
156
+ "pipeline": {
157
+ "$ref": "#/$defs/source_pipeline"
158
+ }
159
+ }
160
+ }
161
+ },
162
+ "$defs": {
163
+ "span": {
164
+ "type": "object",
165
+ "additionalProperties": false,
166
+ "required": [
167
+ "start",
168
+ "end"
169
+ ],
170
+ "properties": {
171
+ "start": {
172
+ "type": "integer",
173
+ "minimum": 0
174
+ },
175
+ "end": {
176
+ "type": "integer",
177
+ "minimum": 0
178
+ }
179
+ }
180
+ },
181
+ "segment": {
182
+ "type": "object",
183
+ "additionalProperties": false,
184
+ "required": [
185
+ "id",
186
+ "span",
187
+ "token_range"
188
+ ],
189
+ "properties": {
190
+ "id": {
191
+ "type": "string",
192
+ "minLength": 1
193
+ },
194
+ "span": {
195
+ "$ref": "#/$defs/span"
196
+ },
197
+ "token_range": {
198
+ "type": "object",
199
+ "additionalProperties": false,
200
+ "required": [
201
+ "start",
202
+ "end"
203
+ ],
204
+ "properties": {
205
+ "start": {
206
+ "type": "integer",
207
+ "minimum": 0
208
+ },
209
+ "end": {
210
+ "type": "integer",
211
+ "minimum": 0
212
+ }
213
+ }
214
+ }
215
+ }
216
+ },
217
+ "token": {
218
+ "type": "object",
219
+ "additionalProperties": false,
220
+ "required": [
221
+ "id",
222
+ "i",
223
+ "segment_id",
224
+ "span",
225
+ "surface"
226
+ ],
227
+ "properties": {
228
+ "id": {
229
+ "type": "string",
230
+ "minLength": 1
231
+ },
232
+ "i": {
233
+ "type": "integer",
234
+ "minimum": 0
235
+ },
236
+ "segment_id": {
237
+ "type": "string",
238
+ "minLength": 1
239
+ },
240
+ "span": {
241
+ "$ref": "#/$defs/span"
242
+ },
243
+ "surface": {
244
+ "type": "string"
245
+ },
246
+ "normalized": {
247
+ "type": "string"
248
+ },
249
+ "flags": {
250
+ "type": "object",
251
+ "additionalProperties": true
252
+ },
253
+ "joiner": {
254
+ "type": "object",
255
+ "additionalProperties": true
256
+ },
257
+ "pos": {
258
+ "type": "object",
259
+ "additionalProperties": false,
260
+ "required": [
261
+ "tag"
262
+ ],
263
+ "properties": {
264
+ "tag": {
265
+ "type": "string",
266
+ "minLength": 1
267
+ },
268
+ "coarse": {
269
+ "type": "string",
270
+ "minLength": 1
271
+ }
272
+ }
273
+ },
274
+ "lexicon": {
275
+ "type": "object",
276
+ "additionalProperties": false,
277
+ "properties": {
278
+ "wikipedia_title_index": {
279
+ "type": "object",
280
+ "additionalProperties": true
281
+ }
282
+ }
283
+ }
284
+ }
285
+ },
286
+ "mention": {
287
+ "type": "object",
288
+ "additionalProperties": false,
289
+ "required": [
290
+ "id",
291
+ "kind",
292
+ "priority",
293
+ "token_ids",
294
+ "head_token_id",
295
+ "span",
296
+ "segment_id",
297
+ "is_primary"
298
+ ],
299
+ "properties": {
300
+ "id": {
301
+ "type": "string",
302
+ "minLength": 1
303
+ },
304
+ "kind": {
305
+ "type": "string",
306
+ "enum": [
307
+ "token",
308
+ "mwe",
309
+ "chunk"
310
+ ]
311
+ },
312
+ "priority": {
313
+ "type": "integer",
314
+ "minimum": 0
315
+ },
316
+ "token_ids": {
317
+ "type": "array",
318
+ "minItems": 1,
319
+ "items": {
320
+ "type": "string",
321
+ "minLength": 1
322
+ }
323
+ },
324
+ "head_token_id": {
325
+ "type": "string",
326
+ "minLength": 1
327
+ },
328
+ "span": {
329
+ "$ref": "#/$defs/span"
330
+ },
331
+ "segment_id": {
332
+ "type": "string",
333
+ "minLength": 1
334
+ },
335
+ "is_primary": {
336
+ "type": "boolean"
337
+ },
338
+ "provenance": {
339
+ "type": "object",
340
+ "additionalProperties": false,
341
+ "properties": {
342
+ "source_annotation_id": {
343
+ "type": "string",
344
+ "minLength": 1
345
+ },
346
+ "source_kind": {
347
+ "type": "string",
348
+ "enum": [
349
+ "mwe_materialized",
350
+ "token_fallback",
351
+ "mwe_alternative",
352
+ "chunk_accepted",
353
+ "token_shadow"
354
+ ]
355
+ },
356
+ "head_strategy": {
357
+ "type": "string",
358
+ "enum": [
359
+ "explicit",
360
+ "chunk_head",
361
+ "dependency_head",
362
+ "pos_fallback",
363
+ "unresolved"
364
+ ]
365
+ },
366
+ "lexicon_source": {
367
+ "type": "string",
368
+ "enum": [
369
+ "wikipedia-title-index"
370
+ ]
371
+ },
372
+ "lexicon_evidence": {
373
+ "type": "object",
374
+ "additionalProperties": true
375
+ }
376
+ }
377
+ }
378
+ }
379
+ },
380
+ "assertion": {
381
+ "type": "object",
382
+ "additionalProperties": false,
383
+ "required": [
384
+ "id",
385
+ "segment_id",
386
+ "predicate",
387
+ "arguments",
388
+ "modifiers",
389
+ "operators",
390
+ "evidence"
391
+ ],
392
+ "properties": {
393
+ "id": {
394
+ "type": "string",
395
+ "minLength": 1
396
+ },
397
+ "segment_id": {
398
+ "type": "string",
399
+ "minLength": 1
400
+ },
401
+ "predicate": {
402
+ "type": "object",
403
+ "additionalProperties": false,
404
+ "required": [
405
+ "mention_id",
406
+ "head_token_id"
407
+ ],
408
+ "properties": {
409
+ "mention_id": {
410
+ "type": "string",
411
+ "minLength": 1
412
+ },
413
+ "head_token_id": {
414
+ "type": "string",
415
+ "minLength": 1
416
+ },
417
+ "lemma": {
418
+ "type": "string"
419
+ }
420
+ }
421
+ },
422
+ "operators": {
423
+ "type": "array",
424
+ "items": {
425
+ "$ref": "#/$defs/operator"
426
+ }
427
+ },
428
+ "evidence": {
429
+ "type": "object",
430
+ "additionalProperties": false,
431
+ "required": [
432
+ "relation_evidence",
433
+ "token_ids"
434
+ ],
435
+ "properties": {
436
+ "relation_evidence": {
437
+ "type": "array",
438
+ "minItems": 1,
439
+ "items": {
440
+ "$ref": "#/$defs/relation_evidence_item"
441
+ }
442
+ },
443
+ "token_ids": {
444
+ "type": "array",
445
+ "minItems": 1,
446
+ "items": {
447
+ "type": "string",
448
+ "minLength": 1
449
+ }
450
+ },
451
+ "wiki_signals": {
452
+ "type": "object",
453
+ "additionalProperties": false,
454
+ "required": [
455
+ "mention_evidence"
456
+ ],
457
+ "properties": {
458
+ "mention_evidence": {
459
+ "type": "array",
460
+ "minItems": 1,
461
+ "items": {
462
+ "type": "object",
463
+ "additionalProperties": false,
464
+ "required": [
465
+ "mention_id",
466
+ "token_ids",
467
+ "evidence"
468
+ ],
469
+ "properties": {
470
+ "mention_id": {
471
+ "type": "string",
472
+ "minLength": 1
473
+ },
474
+ "token_ids": {
475
+ "type": "array",
476
+ "minItems": 1,
477
+ "items": {
478
+ "type": "string",
479
+ "minLength": 1
480
+ }
481
+ },
482
+ "evidence": {
483
+ "type": "object",
484
+ "additionalProperties": true
485
+ }
486
+ }
487
+ }
488
+ }
489
+ }
490
+ }
491
+ }
492
+ },
493
+ "diagnostics": {
494
+ "type": "object",
495
+ "additionalProperties": false,
496
+ "properties": {
497
+ "predicate_quality": {
498
+ "type": "string",
499
+ "enum": [
500
+ "ok",
501
+ "low"
502
+ ]
503
+ },
504
+ "predicate_class": {
505
+ "type": "string",
506
+ "enum": [
507
+ "lexical_verb",
508
+ "copula",
509
+ "auxiliary",
510
+ "preposition",
511
+ "nominal_head"
512
+ ]
513
+ },
514
+ "structural_fragment": {
515
+ "type": "boolean"
516
+ },
517
+ "slot_projection_choice": {
518
+ "type": "object",
519
+ "additionalProperties": false,
520
+ "required": [
521
+ "candidate_count",
522
+ "chosen_mention_id"
523
+ ],
524
+ "properties": {
525
+ "candidate_count": {
526
+ "type": "integer",
527
+ "minimum": 2
528
+ },
529
+ "chosen_mention_id": {
530
+ "type": "string",
531
+ "minLength": 1
532
+ }
533
+ }
534
+ },
535
+ "suppression_eligibility": {
536
+ "type": "object",
537
+ "additionalProperties": false,
538
+ "required": [
539
+ "eligible",
540
+ "failure_reason",
541
+ "candidate_class",
542
+ "segment_id",
543
+ "assertion_id",
544
+ "chosen_host_assertion_id",
545
+ "chosen_host_predicate",
546
+ "chosen_host_predicate_class",
547
+ "source_non_operator_token_ids",
548
+ "chosen_host_token_ids",
549
+ "missing_in_host_token_ids"
550
+ ],
551
+ "properties": {
552
+ "eligible": {
553
+ "type": "boolean"
554
+ },
555
+ "failure_reason": {
556
+ "type": [
557
+ "string",
558
+ "null"
559
+ ],
560
+ "enum": [
561
+ "no_host",
562
+ "no_containment",
563
+ "has_core_slots",
564
+ null
565
+ ]
566
+ },
567
+ "candidate_class": {
568
+ "type": "string",
569
+ "enum": [
570
+ "preposition",
571
+ "nominal_head",
572
+ "auxiliary"
573
+ ]
574
+ },
575
+ "segment_id": {
576
+ "type": "string",
577
+ "minLength": 1
578
+ },
579
+ "assertion_id": {
580
+ "type": "string",
581
+ "minLength": 1
582
+ },
583
+ "chosen_host_assertion_id": {
584
+ "type": [
585
+ "string",
586
+ "null"
587
+ ]
588
+ },
589
+ "chosen_host_predicate": {
590
+ "type": [
591
+ "string",
592
+ "null"
593
+ ]
594
+ },
595
+ "chosen_host_predicate_class": {
596
+ "type": [
597
+ "string",
598
+ "null"
599
+ ]
600
+ },
601
+ "source_non_operator_token_ids": {
602
+ "type": "array",
603
+ "items": {
604
+ "type": "string",
605
+ "minLength": 1
606
+ }
607
+ },
608
+ "chosen_host_token_ids": {
609
+ "type": "array",
610
+ "items": {
611
+ "type": "string",
612
+ "minLength": 1
613
+ }
614
+ },
615
+ "missing_in_host_token_ids": {
616
+ "type": "array",
617
+ "items": {
618
+ "type": "string",
619
+ "minLength": 1
620
+ }
621
+ }
622
+ }
623
+ },
624
+ "suppressed_by": {
625
+ "$ref": "#/$defs/suppressed_by"
626
+ }
627
+ }
628
+ },
629
+ "arguments": {
630
+ "type": "array",
631
+ "items": {
632
+ "$ref": "#/$defs/role_entry"
633
+ }
634
+ },
635
+ "modifiers": {
636
+ "type": "array",
637
+ "items": {
638
+ "$ref": "#/$defs/role_entry"
639
+ }
640
+ }
641
+ }
642
+ },
643
+ "mention_id_list": {
644
+ "type": "array",
645
+ "items": {
646
+ "type": "string",
647
+ "minLength": 1
648
+ }
649
+ },
650
+ "slot_pair": {
651
+ "type": "object",
652
+ "additionalProperties": false,
653
+ "required": [
654
+ "role",
655
+ "mention_ids"
656
+ ],
657
+ "properties": {
658
+ "role": {
659
+ "type": "string",
660
+ "minLength": 1
661
+ },
662
+ "mention_ids": {
663
+ "$ref": "#/$defs/mention_id_list"
664
+ }
665
+ }
666
+ },
667
+ "operator": {
668
+ "type": "object",
669
+ "additionalProperties": false,
670
+ "required": [
671
+ "kind"
672
+ ],
673
+ "properties": {
674
+ "kind": {
675
+ "type": "string",
676
+ "enum": [
677
+ "modality",
678
+ "negation",
679
+ "coordination_group",
680
+ "compare",
681
+ "compare_gt",
682
+ "compare_lt",
683
+ "quantifier",
684
+ "control_inherit_subject",
685
+ "control_propagation"
686
+ ]
687
+ },
688
+ "value": {
689
+ "type": "string"
690
+ },
691
+ "token_id": {
692
+ "type": "string",
693
+ "minLength": 1
694
+ },
695
+ "group_id": {
696
+ "type": "string",
697
+ "minLength": 1
698
+ },
699
+ "evidence": {
700
+ "type": "array",
701
+ "items": {
702
+ "$ref": "#/$defs/relation_evidence_item"
703
+ }
704
+ }
705
+ },
706
+ "allOf": [
707
+ {
708
+ "if": {
709
+ "properties": {
710
+ "kind": {
711
+ "const": "modality"
712
+ }
713
+ }
714
+ },
715
+ "then": {
716
+ "required": [
717
+ "value"
718
+ ]
719
+ }
720
+ },
721
+ {
722
+ "if": {
723
+ "properties": {
724
+ "kind": {
725
+ "const": "negation"
726
+ }
727
+ }
728
+ },
729
+ "then": {
730
+ "required": [
731
+ "token_id"
732
+ ]
733
+ }
734
+ },
735
+ {
736
+ "if": {
737
+ "properties": {
738
+ "kind": {
739
+ "const": "coordination_group"
740
+ }
741
+ }
742
+ },
743
+ "then": {
744
+ "required": [
745
+ "group_id"
746
+ ]
747
+ }
748
+ },
749
+ {
750
+ "if": {
751
+ "properties": {
752
+ "kind": {
753
+ "enum": [
754
+ "compare",
755
+ "compare_gt",
756
+ "compare_lt",
757
+ "quantifier"
758
+ ]
759
+ }
760
+ }
761
+ },
762
+ "then": {
763
+ "required": [
764
+ "token_id"
765
+ ]
766
+ }
767
+ }
768
+ ]
769
+ },
770
+ "relation_projection": {
771
+ "type": "object",
772
+ "additionalProperties": false,
773
+ "required": [
774
+ "all_relations",
775
+ "projected_relations",
776
+ "dropped_relations"
777
+ ],
778
+ "properties": {
779
+ "all_relations": {
780
+ "type": "array",
781
+ "items": {
782
+ "$ref": "#/$defs/relation_projection_all_item"
783
+ }
784
+ },
785
+ "projected_relations": {
786
+ "type": "array",
787
+ "items": {
788
+ "$ref": "#/$defs/relation_projection_projected_item"
789
+ }
790
+ },
791
+ "dropped_relations": {
792
+ "type": "array",
793
+ "items": {
794
+ "$ref": "#/$defs/relation_projection_dropped_item"
795
+ }
796
+ }
797
+ }
798
+ },
799
+ "relation_projection_all_item": {
800
+ "type": "object",
801
+ "additionalProperties": false,
802
+ "required": [
803
+ "relation_id",
804
+ "label",
805
+ "segment_id",
806
+ "head_token_id",
807
+ "dep_token_id",
808
+ "head_primary_mention_id",
809
+ "dep_primary_mention_id",
810
+ "head_mention_ids",
811
+ "dep_mention_ids"
812
+ ],
813
+ "properties": {
814
+ "relation_id": {
815
+ "type": "string"
816
+ },
817
+ "label": {
818
+ "type": "string",
819
+ "minLength": 1
820
+ },
821
+ "segment_id": {
822
+ "type": "string",
823
+ "minLength": 1
824
+ },
825
+ "head_token_id": {
826
+ "type": "string",
827
+ "minLength": 1
828
+ },
829
+ "dep_token_id": {
830
+ "type": "string",
831
+ "minLength": 1
832
+ },
833
+ "head_primary_mention_id": {
834
+ "type": [
835
+ "string",
836
+ "null"
837
+ ]
838
+ },
839
+ "dep_primary_mention_id": {
840
+ "type": [
841
+ "string",
842
+ "null"
843
+ ]
844
+ },
845
+ "head_mention_ids": {
846
+ "$ref": "#/$defs/mention_id_list"
847
+ },
848
+ "dep_mention_ids": {
849
+ "$ref": "#/$defs/mention_id_list"
850
+ }
851
+ }
852
+ },
853
+ "relation_projection_projected_item": {
854
+ "type": "object",
855
+ "additionalProperties": false,
856
+ "required": [
857
+ "relation_id",
858
+ "label",
859
+ "segment_id",
860
+ "head_token_id",
861
+ "dep_token_id",
862
+ "head_mention_id",
863
+ "dep_mention_id"
864
+ ],
865
+ "properties": {
866
+ "relation_id": {
867
+ "type": "string"
868
+ },
869
+ "label": {
870
+ "type": "string",
871
+ "minLength": 1
872
+ },
873
+ "segment_id": {
874
+ "type": "string",
875
+ "minLength": 1
876
+ },
877
+ "head_token_id": {
878
+ "type": "string",
879
+ "minLength": 1
880
+ },
881
+ "dep_token_id": {
882
+ "type": "string",
883
+ "minLength": 1
884
+ },
885
+ "head_mention_id": {
886
+ "type": "string",
887
+ "minLength": 1
888
+ },
889
+ "dep_mention_id": {
890
+ "type": "string",
891
+ "minLength": 1
892
+ }
893
+ }
894
+ },
895
+ "relation_projection_dropped_item": {
896
+ "type": "object",
897
+ "additionalProperties": false,
898
+ "required": [
899
+ "relation_id",
900
+ "label",
901
+ "segment_id",
902
+ "reason",
903
+ "head_token_id",
904
+ "dep_token_id",
905
+ "head_primary_mention_id",
906
+ "dep_primary_mention_id"
907
+ ],
908
+ "properties": {
909
+ "relation_id": {
910
+ "type": "string"
911
+ },
912
+ "label": {
913
+ "type": "string",
914
+ "minLength": 1
915
+ },
916
+ "segment_id": {
917
+ "type": "string",
918
+ "minLength": 1
919
+ },
920
+ "reason": {
921
+ "type": "string",
922
+ "minLength": 1
923
+ },
924
+ "head_token_id": {
925
+ "type": "string",
926
+ "minLength": 1
927
+ },
928
+ "dep_token_id": {
929
+ "type": "string",
930
+ "minLength": 1
931
+ },
932
+ "head_primary_mention_id": {
933
+ "type": [
934
+ "string",
935
+ "null"
936
+ ]
937
+ },
938
+ "dep_primary_mention_id": {
939
+ "type": [
940
+ "string",
941
+ "null"
942
+ ]
943
+ }
944
+ }
945
+ },
946
+ "accepted_annotation_summary": {
947
+ "type": "object",
948
+ "additionalProperties": false,
949
+ "required": [
950
+ "id",
951
+ "kind",
952
+ "status",
953
+ "token_ids",
954
+ "source_names"
955
+ ],
956
+ "properties": {
957
+ "id": {
958
+ "type": "string"
959
+ },
960
+ "kind": {
961
+ "type": "string",
962
+ "minLength": 1
963
+ },
964
+ "status": {
965
+ "type": "string",
966
+ "minLength": 1
967
+ },
968
+ "label": {
969
+ "type": "string"
970
+ },
971
+ "token_ids": {
972
+ "$ref": "#/$defs/mention_id_list"
973
+ },
974
+ "span": {
975
+ "$ref": "#/$defs/span"
976
+ },
977
+ "source_names": {
978
+ "type": "array",
979
+ "items": {
980
+ "type": "string",
981
+ "minLength": 1
982
+ }
983
+ }
984
+ }
985
+ },
986
+ "diagnostics": {
987
+ "type": "object",
988
+ "additionalProperties": false,
989
+ "required": [
990
+ "token_wiki_signal_count",
991
+ "mentions_with_lexicon_evidence",
992
+ "assertions_with_wiki_signals",
993
+ "projected_relation_count",
994
+ "dropped_relation_count",
995
+ "subject_role_gaps",
996
+ "warnings"
997
+ ],
998
+ "properties": {
999
+ "token_wiki_signal_count": {
1000
+ "type": "integer",
1001
+ "minimum": 0
1002
+ },
1003
+ "mentions_with_lexicon_evidence": {
1004
+ "type": "integer",
1005
+ "minimum": 0
1006
+ },
1007
+ "assertions_with_wiki_signals": {
1008
+ "type": "integer",
1009
+ "minimum": 0
1010
+ },
1011
+ "projected_relation_count": {
1012
+ "type": "integer",
1013
+ "minimum": 0
1014
+ },
1015
+ "dropped_relation_count": {
1016
+ "type": "integer",
1017
+ "minimum": 0
1018
+ },
1019
+ "fragmentation": {
1020
+ "type": "object",
1021
+ "additionalProperties": false,
1022
+ "required": [
1023
+ "structural_fragment_count",
1024
+ "predicate_noise_index",
1025
+ "per_segment"
1026
+ ],
1027
+ "properties": {
1028
+ "structural_fragment_count": {
1029
+ "type": "integer",
1030
+ "minimum": 0
1031
+ },
1032
+ "predicate_noise_index": {
1033
+ "type": "number",
1034
+ "minimum": 0
1035
+ },
1036
+ "per_segment": {
1037
+ "type": "array",
1038
+ "items": {
1039
+ "type": "object",
1040
+ "additionalProperties": false,
1041
+ "required": [
1042
+ "segment_id",
1043
+ "predicate_assertion_count",
1044
+ "lexical_verb_count",
1045
+ "tolerated_auxiliary_count",
1046
+ "structural_fragment_count",
1047
+ "clause_fragmentation_warning"
1048
+ ],
1049
+ "properties": {
1050
+ "segment_id": {
1051
+ "type": "string",
1052
+ "minLength": 1
1053
+ },
1054
+ "predicate_assertion_count": {
1055
+ "type": "integer",
1056
+ "minimum": 0
1057
+ },
1058
+ "lexical_verb_count": {
1059
+ "type": "integer",
1060
+ "minimum": 0
1061
+ },
1062
+ "tolerated_auxiliary_count": {
1063
+ "type": "integer",
1064
+ "minimum": 0
1065
+ },
1066
+ "structural_fragment_count": {
1067
+ "type": "integer",
1068
+ "minimum": 0
1069
+ },
1070
+ "clause_fragmentation_warning": {
1071
+ "type": "boolean"
1072
+ }
1073
+ }
1074
+ }
1075
+ }
1076
+ }
1077
+ },
1078
+ "gap_signals": {
1079
+ "type": "object",
1080
+ "additionalProperties": false,
1081
+ "required": [
1082
+ "coordination_type_missing",
1083
+ "comparative_gap",
1084
+ "quantifier_scope_gap"
1085
+ ],
1086
+ "properties": {
1087
+ "coordination_type_missing": {
1088
+ "type": "boolean"
1089
+ },
1090
+ "comparative_gap": {
1091
+ "type": "boolean"
1092
+ },
1093
+ "quantifier_scope_gap": {
1094
+ "type": "boolean"
1095
+ }
1096
+ }
1097
+ },
1098
+ "coordination_groups": {
1099
+ "type": "array",
1100
+ "items": {
1101
+ "type": "object",
1102
+ "additionalProperties": false,
1103
+ "required": [
1104
+ "id",
1105
+ "member_assertion_ids"
1106
+ ],
1107
+ "properties": {
1108
+ "id": {
1109
+ "type": "string",
1110
+ "minLength": 1
1111
+ },
1112
+ "type": {
1113
+ "type": [
1114
+ "string",
1115
+ "null"
1116
+ ]
1117
+ },
1118
+ "member_assertion_ids": {
1119
+ "type": "array",
1120
+ "items": {
1121
+ "type": "string",
1122
+ "minLength": 1
1123
+ }
1124
+ }
1125
+ }
1126
+ }
1127
+ },
1128
+ "subject_role_gaps": {
1129
+ "type": "array",
1130
+ "items": {
1131
+ "type": "object",
1132
+ "additionalProperties": false,
1133
+ "required": [
1134
+ "segment_id",
1135
+ "assertion_id",
1136
+ "predicate_mention_id",
1137
+ "predicate_head_token_id",
1138
+ "reason",
1139
+ "evidence"
1140
+ ],
1141
+ "properties": {
1142
+ "segment_id": {
1143
+ "type": "string",
1144
+ "minLength": 1
1145
+ },
1146
+ "assertion_id": {
1147
+ "type": "string",
1148
+ "minLength": 1
1149
+ },
1150
+ "predicate_mention_id": {
1151
+ "type": "string",
1152
+ "minLength": 1
1153
+ },
1154
+ "predicate_head_token_id": {
1155
+ "type": "string",
1156
+ "minLength": 1
1157
+ },
1158
+ "reason": {
1159
+ "type": "string",
1160
+ "enum": [
1161
+ "missing_subject_role"
1162
+ ]
1163
+ },
1164
+ "evidence": {
1165
+ "type": "object",
1166
+ "additionalProperties": false,
1167
+ "required": [
1168
+ "token_ids",
1169
+ "upstream_relation_ids"
1170
+ ],
1171
+ "properties": {
1172
+ "token_ids": {
1173
+ "type": "array",
1174
+ "items": {
1175
+ "type": "string",
1176
+ "minLength": 1
1177
+ }
1178
+ },
1179
+ "upstream_relation_ids": {
1180
+ "type": "array",
1181
+ "items": {
1182
+ "type": "string",
1183
+ "minLength": 1
1184
+ }
1185
+ }
1186
+ }
1187
+ }
1188
+ }
1189
+ }
1190
+ },
1191
+ "warnings": {
1192
+ "type": "array",
1193
+ "items": {
1194
+ "type": "string",
1195
+ "minLength": 1
1196
+ }
1197
+ },
1198
+ "suppressed_assertions": {
1199
+ "type": "array",
1200
+ "items": {
1201
+ "$ref": "#/$defs/suppressed_assertion_trace"
1202
+ }
1203
+ }
1204
+ }
1205
+ },
1206
+ "suppressed_by": {
1207
+ "type": "object",
1208
+ "additionalProperties": false,
1209
+ "required": [
1210
+ "kind",
1211
+ "target_assertion_id",
1212
+ "reason"
1213
+ ],
1214
+ "properties": {
1215
+ "kind": {
1216
+ "type": "string",
1217
+ "enum": [
1218
+ "predicate_redirect"
1219
+ ]
1220
+ },
1221
+ "target_assertion_id": {
1222
+ "type": "string",
1223
+ "minLength": 1
1224
+ },
1225
+ "reason": {
1226
+ "type": "string",
1227
+ "enum": [
1228
+ "predicate_upgraded_to_lexical",
1229
+ "modality_moved_to_lexical",
1230
+ "role_carrier_suppressed",
1231
+ "role_carrier_suppressed_v2_nominal",
1232
+ "copula_bucket_sink_suppressed"
1233
+ ]
1234
+ },
1235
+ "evidence": {
1236
+ "type": "object",
1237
+ "additionalProperties": false,
1238
+ "properties": {
1239
+ "upstream_relation_ids": {
1240
+ "type": "array",
1241
+ "items": {
1242
+ "type": "string",
1243
+ "minLength": 1
1244
+ }
1245
+ },
1246
+ "token_ids": {
1247
+ "type": "array",
1248
+ "items": {
1249
+ "type": "string",
1250
+ "minLength": 1
1251
+ }
1252
+ }
1253
+ }
1254
+ }
1255
+ }
1256
+ },
1257
+ "suppressed_assertion_trace": {
1258
+ "type": "object",
1259
+ "additionalProperties": false,
1260
+ "required": [
1261
+ "id",
1262
+ "segment_id",
1263
+ "predicate",
1264
+ "diagnostics"
1265
+ ],
1266
+ "properties": {
1267
+ "id": {
1268
+ "type": "string",
1269
+ "minLength": 1
1270
+ },
1271
+ "suppressed_assertion_id": {
1272
+ "type": "string",
1273
+ "minLength": 1
1274
+ },
1275
+ "host_assertion_id": {
1276
+ "type": "string",
1277
+ "minLength": 1
1278
+ },
1279
+ "reason": {
1280
+ "type": "string",
1281
+ "enum": [
1282
+ "role_carrier_suppressed",
1283
+ "role_carrier_suppressed_v2_nominal",
1284
+ "copula_bucket_sink_suppressed"
1285
+ ]
1286
+ },
1287
+ "predicate_class": {
1288
+ "type": "string",
1289
+ "enum": [
1290
+ "lexical_verb",
1291
+ "copula",
1292
+ "auxiliary",
1293
+ "preposition",
1294
+ "nominal_head"
1295
+ ]
1296
+ },
1297
+ "transferred_buckets": {
1298
+ "type": "array",
1299
+ "items": {
1300
+ "type": "string",
1301
+ "anyOf": [
1302
+ {
1303
+ "enum": [
1304
+ "actor",
1305
+ "theme",
1306
+ "attr",
1307
+ "topic",
1308
+ "location",
1309
+ "other"
1310
+ ]
1311
+ },
1312
+ {
1313
+ "pattern": "^operator:[a-z_]+$"
1314
+ }
1315
+ ]
1316
+ }
1317
+ },
1318
+ "transferred_mention_ids": {
1319
+ "type": "array",
1320
+ "items": {
1321
+ "type": "string",
1322
+ "minLength": 1
1323
+ }
1324
+ },
1325
+ "evidence": {
1326
+ "type": "object",
1327
+ "additionalProperties": false,
1328
+ "properties": {
1329
+ "token_ids": {
1330
+ "type": "array",
1331
+ "items": {
1332
+ "type": "string",
1333
+ "minLength": 1
1334
+ }
1335
+ }
1336
+ }
1337
+ },
1338
+ "segment_id": {
1339
+ "type": "string",
1340
+ "minLength": 1
1341
+ },
1342
+ "predicate": {
1343
+ "type": "object",
1344
+ "additionalProperties": false,
1345
+ "required": [
1346
+ "mention_id",
1347
+ "head_token_id"
1348
+ ],
1349
+ "properties": {
1350
+ "mention_id": {
1351
+ "type": "string",
1352
+ "minLength": 1
1353
+ },
1354
+ "head_token_id": {
1355
+ "type": "string",
1356
+ "minLength": 1
1357
+ }
1358
+ }
1359
+ },
1360
+ "diagnostics": {
1361
+ "type": "object",
1362
+ "additionalProperties": false,
1363
+ "required": [
1364
+ "suppressed_by"
1365
+ ],
1366
+ "properties": {
1367
+ "predicate_quality": {
1368
+ "type": "string",
1369
+ "enum": [
1370
+ "ok",
1371
+ "low"
1372
+ ]
1373
+ },
1374
+ "suppressed_by": {
1375
+ "$ref": "#/$defs/suppressed_by"
1376
+ }
1377
+ }
1378
+ }
1379
+ }
1380
+ },
1381
+ "wiki_title_evidence": {
1382
+ "type": "object",
1383
+ "additionalProperties": false,
1384
+ "required": [
1385
+ "normalization",
1386
+ "mention_matches",
1387
+ "assertion_predicate_matches"
1388
+ ],
1389
+ "properties": {
1390
+ "normalization": {
1391
+ "type": "object",
1392
+ "additionalProperties": false,
1393
+ "required": [
1394
+ "unicode_form",
1395
+ "punctuation_map",
1396
+ "whitespace",
1397
+ "casefold"
1398
+ ],
1399
+ "properties": {
1400
+ "unicode_form": {
1401
+ "type": "string",
1402
+ "minLength": 1
1403
+ },
1404
+ "punctuation_map": {
1405
+ "type": "object",
1406
+ "additionalProperties": true
1407
+ },
1408
+ "whitespace": {
1409
+ "type": "string",
1410
+ "minLength": 1
1411
+ },
1412
+ "casefold": {
1413
+ "type": "string",
1414
+ "minLength": 1
1415
+ }
1416
+ }
1417
+ },
1418
+ "mention_matches": {
1419
+ "type": "array",
1420
+ "items": {
1421
+ "$ref": "#/$defs/wiki_mention_match"
1422
+ }
1423
+ },
1424
+ "assertion_predicate_matches": {
1425
+ "type": "array",
1426
+ "items": {
1427
+ "$ref": "#/$defs/wiki_assertion_match"
1428
+ }
1429
+ }
1430
+ }
1431
+ },
1432
+ "wiki_mention_match": {
1433
+ "type": "object",
1434
+ "additionalProperties": false,
1435
+ "required": [
1436
+ "mention_id",
1437
+ "normalized_surface",
1438
+ "exact_titles",
1439
+ "prefix_titles"
1440
+ ],
1441
+ "properties": {
1442
+ "mention_id": {
1443
+ "type": "string",
1444
+ "minLength": 1
1445
+ },
1446
+ "normalized_surface": {
1447
+ "type": "string"
1448
+ },
1449
+ "exact_titles": {
1450
+ "type": "array",
1451
+ "items": {
1452
+ "type": "string",
1453
+ "minLength": 1
1454
+ }
1455
+ },
1456
+ "prefix_titles": {
1457
+ "type": "array",
1458
+ "items": {
1459
+ "type": "string",
1460
+ "minLength": 1
1461
+ }
1462
+ }
1463
+ }
1464
+ },
1465
+ "wiki_assertion_match": {
1466
+ "type": "object",
1467
+ "additionalProperties": false,
1468
+ "required": [
1469
+ "assertion_id",
1470
+ "predicate_mention_id",
1471
+ "exact_titles",
1472
+ "prefix_titles"
1473
+ ],
1474
+ "properties": {
1475
+ "assertion_id": {
1476
+ "type": "string",
1477
+ "minLength": 1
1478
+ },
1479
+ "predicate_mention_id": {
1480
+ "type": "string",
1481
+ "minLength": 1
1482
+ },
1483
+ "exact_titles": {
1484
+ "type": "array",
1485
+ "items": {
1486
+ "type": "string",
1487
+ "minLength": 1
1488
+ }
1489
+ },
1490
+ "prefix_titles": {
1491
+ "type": "array",
1492
+ "items": {
1493
+ "type": "string",
1494
+ "minLength": 1
1495
+ }
1496
+ }
1497
+ }
1498
+ },
1499
+ "relation_evidence_item": {
1500
+ "type": "object",
1501
+ "additionalProperties": false,
1502
+ "required": [
1503
+ "from_token_id",
1504
+ "to_token_id",
1505
+ "label",
1506
+ "annotation_id"
1507
+ ],
1508
+ "properties": {
1509
+ "annotation_id": {
1510
+ "type": "string",
1511
+ "minLength": 1
1512
+ },
1513
+ "from_token_id": {
1514
+ "type": "string",
1515
+ "minLength": 1
1516
+ },
1517
+ "to_token_id": {
1518
+ "type": "string",
1519
+ "minLength": 1
1520
+ },
1521
+ "label": {
1522
+ "type": "string",
1523
+ "minLength": 1
1524
+ }
1525
+ }
1526
+ },
1527
+ "unresolved_item": {
1528
+ "type": "object",
1529
+ "additionalProperties": false,
1530
+ "required": [
1531
+ "kind",
1532
+ "segment_id",
1533
+ "mention_id",
1534
+ "reason",
1535
+ "evidence"
1536
+ ],
1537
+ "properties": {
1538
+ "kind": {
1539
+ "type": "string",
1540
+ "enum": [
1541
+ "unresolved_attachment",
1542
+ "unresolved_head"
1543
+ ]
1544
+ },
1545
+ "segment_id": {
1546
+ "type": "string",
1547
+ "minLength": 1
1548
+ },
1549
+ "mention_id": {
1550
+ "type": "string",
1551
+ "minLength": 1
1552
+ },
1553
+ "mention_ids": {
1554
+ "type": "array",
1555
+ "items": {
1556
+ "type": "string",
1557
+ "minLength": 1
1558
+ }
1559
+ },
1560
+ "reason": {
1561
+ "type": "string",
1562
+ "enum": [
1563
+ "missing_relation",
1564
+ "projection_failed",
1565
+ "predicate_invalid",
1566
+ "operator_scope_open",
1567
+ "coord_type_missing"
1568
+ ]
1569
+ },
1570
+ "evidence": {
1571
+ "type": "object",
1572
+ "additionalProperties": false,
1573
+ "required": [
1574
+ "token_ids"
1575
+ ],
1576
+ "properties": {
1577
+ "token_ids": {
1578
+ "type": "array",
1579
+ "minItems": 1,
1580
+ "items": {
1581
+ "type": "string",
1582
+ "minLength": 1
1583
+ }
1584
+ },
1585
+ "upstream_relation_ids": {
1586
+ "type": "array",
1587
+ "items": {
1588
+ "type": "string",
1589
+ "minLength": 1
1590
+ }
1591
+ },
1592
+ "span": {
1593
+ "$ref": "#/$defs/span"
1594
+ }
1595
+ }
1596
+ }
1597
+ }
1598
+ },
1599
+ "source_input": {
1600
+ "type": "object",
1601
+ "additionalProperties": false,
1602
+ "required": [
1603
+ "artifact",
1604
+ "digest"
1605
+ ],
1606
+ "properties": {
1607
+ "artifact": {
1608
+ "type": "string",
1609
+ "enum": [
1610
+ "seed.txt",
1611
+ "seed.text.in_memory",
1612
+ "relations_extracted.in_memory",
1613
+ "seed.relations.yaml",
1614
+ "seed.mwe.materialized.yaml",
1615
+ "seed.chunks.yaml",
1616
+ "seed.heads.yaml",
1617
+ "seed.tokens.yaml",
1618
+ "seed.pos.yaml"
1619
+ ]
1620
+ },
1621
+ "digest": {
1622
+ "type": "string",
1623
+ "minLength": 1
1624
+ },
1625
+ "origin": {
1626
+ "$ref": "#/$defs/source_input_origin"
1627
+ }
1628
+ }
1629
+ },
1630
+ "source_input_origin": {
1631
+ "type": "object",
1632
+ "additionalProperties": false,
1633
+ "required": [
1634
+ "kind"
1635
+ ],
1636
+ "properties": {
1637
+ "kind": {
1638
+ "type": "string",
1639
+ "enum": [
1640
+ "in_memory",
1641
+ "file"
1642
+ ]
1643
+ },
1644
+ "path": {
1645
+ "type": "string",
1646
+ "minLength": 1
1647
+ },
1648
+ "mtime_ms": {
1649
+ "type": "integer",
1650
+ "minimum": 0
1651
+ },
1652
+ "read_at": {
1653
+ "type": "string",
1654
+ "minLength": 1
1655
+ }
1656
+ },
1657
+ "allOf": [
1658
+ {
1659
+ "if": {
1660
+ "properties": {
1661
+ "kind": {
1662
+ "const": "file"
1663
+ }
1664
+ }
1665
+ },
1666
+ "then": {
1667
+ "required": [
1668
+ "path",
1669
+ "mtime_ms"
1670
+ ]
1671
+ }
1672
+ }
1673
+ ]
1674
+ },
1675
+ "source_pipeline": {
1676
+ "type": "object",
1677
+ "additionalProperties": false,
1678
+ "required": [
1679
+ "target",
1680
+ "relations_extracted_digest",
1681
+ "token_count",
1682
+ "annotation_count",
1683
+ "wikipedia_title_index_configured"
1684
+ ],
1685
+ "properties": {
1686
+ "target": {
1687
+ "type": "string",
1688
+ "minLength": 1
1689
+ },
1690
+ "relations_extracted_digest": {
1691
+ "type": "string",
1692
+ "minLength": 1
1693
+ },
1694
+ "token_count": {
1695
+ "type": "integer",
1696
+ "minimum": 0
1697
+ },
1698
+ "annotation_count": {
1699
+ "type": "integer",
1700
+ "minimum": 0
1701
+ },
1702
+ "wikipedia_title_index_configured": {
1703
+ "type": "boolean"
1704
+ }
1705
+ }
1706
+ },
1707
+ "role_entry": {
1708
+ "type": "object",
1709
+ "additionalProperties": false,
1710
+ "required": [
1711
+ "role",
1712
+ "mention_ids",
1713
+ "evidence"
1714
+ ],
1715
+ "properties": {
1716
+ "role": {
1717
+ "type": "string",
1718
+ "minLength": 1
1719
+ },
1720
+ "mention_ids": {
1721
+ "$ref": "#/$defs/mention_id_list"
1722
+ },
1723
+ "evidence": {
1724
+ "type": "object",
1725
+ "additionalProperties": false,
1726
+ "required": [
1727
+ "relation_ids",
1728
+ "token_ids"
1729
+ ],
1730
+ "properties": {
1731
+ "relation_ids": {
1732
+ "type": "array",
1733
+ "items": {
1734
+ "type": "string",
1735
+ "minLength": 1
1736
+ }
1737
+ },
1738
+ "token_ids": {
1739
+ "type": "array",
1740
+ "items": {
1741
+ "type": "string",
1742
+ "minLength": 1
1743
+ }
1744
+ }
1745
+ }
1746
+ }
1747
+ }
1748
+ }
1749
+ }
1750
+ }
1751
+