elementary-assertions 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +353 -0
- package/LICENSE +21 -0
- package/README.md +211 -0
- package/bin/elementary-assertions.js +8 -0
- package/docs/DEV_TOOLING.md +98 -0
- package/docs/NPM_RELEASE.md +177 -0
- package/docs/OPERATIONAL.md +159 -0
- package/docs/RELEASE_NOTES_TEMPLATE.md +37 -0
- package/docs/REPO_WORKFLOWS.md +48 -0
- package/package.json +46 -0
- package/src/core/accepted-annotations.js +44 -0
- package/src/core/assertions.js +2304 -0
- package/src/core/determinism.js +95 -0
- package/src/core/diagnostics.js +496 -0
- package/src/core/ids.js +9 -0
- package/src/core/mention-builder.js +272 -0
- package/src/core/mention-evidence.js +52 -0
- package/src/core/mention-head-resolution.js +108 -0
- package/src/core/mention-materialization.js +31 -0
- package/src/core/mentions.js +149 -0
- package/src/core/output.js +296 -0
- package/src/core/projection.js +192 -0
- package/src/core/roles.js +164 -0
- package/src/core/strings.js +7 -0
- package/src/core/tokens.js +53 -0
- package/src/core/upstream.js +31 -0
- package/src/index.js +6 -0
- package/src/render/index.js +5 -0
- package/src/render/layouts/compact.js +10 -0
- package/src/render/layouts/meaning.js +7 -0
- package/src/render/layouts/readable.js +7 -0
- package/src/render/layouts/table.js +7 -0
- package/src/render/render.js +931 -0
- package/src/run.js +278 -0
- package/src/schema/seed.elementary-assertions.schema.json +1751 -0
- package/src/tools/cli.js +158 -0
- package/src/tools/index.js +6 -0
- package/src/tools/io.js +55 -0
- package/src/validate/ajv.js +20 -0
- package/src/validate/coverage.js +215 -0
- package/src/validate/determinism.js +115 -0
- package/src/validate/diagnostics-strict.js +392 -0
- package/src/validate/errors.js +19 -0
- package/src/validate/index.js +20 -0
- package/src/validate/integrity.js +41 -0
- package/src/validate/invariants.js +157 -0
- package/src/validate/references.js +110 -0
- package/src/validate/schema.js +50 -0
|
@@ -0,0 +1,392 @@
|
|
|
1
|
+
const { failValidation } = require("./errors");
|
|
2
|
+
function assertSortedStrings(values, code, message) {
|
|
3
|
+
for (let i = 1; i < values.length; i += 1) {
|
|
4
|
+
if (String(values[i - 1]).localeCompare(String(values[i])) > 0) {
|
|
5
|
+
failValidation(code, message);
|
|
6
|
+
}
|
|
7
|
+
}
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
function buildAssertionMap(doc) {
|
|
11
|
+
return new Map((doc.assertions || []).map((a) => [a && a.id, a]).filter(([id]) => typeof id === "string" && id.length > 0));
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function buildMentionMap(doc) {
|
|
15
|
+
return new Map((doc.mentions || []).map((m) => [m && m.id, m]).filter(([id]) => typeof id === "string" && id.length > 0));
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
function buildTokenMap(doc) {
|
|
19
|
+
return new Map((doc.tokens || []).map((t) => [t && t.id, t]).filter(([id]) => typeof id === "string" && id.length > 0));
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
function collectAssertionMentionRefs(assertion) {
|
|
23
|
+
const out = new Set();
|
|
24
|
+
for (const entry of (assertion && assertion.arguments) || []) {
|
|
25
|
+
for (const mentionId of (entry && entry.mention_ids) || []) out.add(mentionId);
|
|
26
|
+
}
|
|
27
|
+
for (const entry of (assertion && assertion.modifiers) || []) {
|
|
28
|
+
for (const mentionId of (entry && entry.mention_ids) || []) out.add(mentionId);
|
|
29
|
+
}
|
|
30
|
+
return out;
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
function validateSuppressionEligibility(assertions, assertionById) {
|
|
34
|
+
for (const assertion of assertions || []) {
|
|
35
|
+
const assertionId = String((assertion && assertion.id) || "<unknown>");
|
|
36
|
+
const eligibility = ((((assertion || {}).diagnostics) || {}).suppression_eligibility) || null;
|
|
37
|
+
if (!eligibility || typeof eligibility !== "object") continue;
|
|
38
|
+
const isEligible = Boolean(eligibility.eligible);
|
|
39
|
+
const failureReason = eligibility.failure_reason;
|
|
40
|
+
if (isEligible && failureReason !== null) {
|
|
41
|
+
failValidation(
|
|
42
|
+
"EA_VALIDATE_STRICT_SUPPRESSION_ELIGIBILITY",
|
|
43
|
+
`Strict diagnostics error: assertion ${assertionId} has eligible=true with non-null failure_reason.`
|
|
44
|
+
);
|
|
45
|
+
}
|
|
46
|
+
if (!isEligible && failureReason === null) {
|
|
47
|
+
failValidation(
|
|
48
|
+
"EA_VALIDATE_STRICT_SUPPRESSION_ELIGIBILITY",
|
|
49
|
+
`Strict diagnostics error: assertion ${assertionId} has eligible=false with null failure_reason.`
|
|
50
|
+
);
|
|
51
|
+
}
|
|
52
|
+
if (eligibility.segment_id !== assertion.segment_id) {
|
|
53
|
+
failValidation(
|
|
54
|
+
"EA_VALIDATE_STRICT_SUPPRESSION_ELIGIBILITY",
|
|
55
|
+
`Strict diagnostics error: assertion ${assertionId} suppression_eligibility.segment_id must match assertion.segment_id.`
|
|
56
|
+
);
|
|
57
|
+
}
|
|
58
|
+
if (eligibility.assertion_id !== assertionId) {
|
|
59
|
+
failValidation(
|
|
60
|
+
"EA_VALIDATE_STRICT_SUPPRESSION_ELIGIBILITY",
|
|
61
|
+
`Strict diagnostics error: assertion ${assertionId} suppression_eligibility.assertion_id must match assertion.id.`
|
|
62
|
+
);
|
|
63
|
+
}
|
|
64
|
+
const hostAssertionId = eligibility.chosen_host_assertion_id;
|
|
65
|
+
if (hostAssertionId !== null && (typeof hostAssertionId !== "string" || !assertionById.has(hostAssertionId))) {
|
|
66
|
+
failValidation(
|
|
67
|
+
"EA_VALIDATE_STRICT_SUPPRESSION_ELIGIBILITY",
|
|
68
|
+
`Strict diagnostics error: assertion ${assertionId} suppression_eligibility.chosen_host_assertion_id must reference an existing assertion or be null.`
|
|
69
|
+
);
|
|
70
|
+
}
|
|
71
|
+
for (const key of ["source_non_operator_token_ids", "chosen_host_token_ids", "missing_in_host_token_ids"]) {
|
|
72
|
+
const values = Array.isArray(eligibility[key]) ? eligibility[key] : [];
|
|
73
|
+
assertSortedStrings(
|
|
74
|
+
values,
|
|
75
|
+
"EA_VALIDATE_STRICT_SUPPRESSION_ELIGIBILITY",
|
|
76
|
+
`Strict diagnostics error: assertion ${assertionId} suppression_eligibility.${key} must be sorted.`
|
|
77
|
+
);
|
|
78
|
+
}
|
|
79
|
+
if (failureReason === "no_host" && hostAssertionId !== null) {
|
|
80
|
+
failValidation(
|
|
81
|
+
"EA_VALIDATE_STRICT_SUPPRESSION_ELIGIBILITY",
|
|
82
|
+
`Strict diagnostics error: assertion ${assertionId} with failure_reason=no_host must not set chosen_host_assertion_id.`
|
|
83
|
+
);
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
function validateCoordinationGroups(doc, assertionById) {
|
|
89
|
+
const groups = Array.isArray((((doc || {}).diagnostics) || {}).coordination_groups)
|
|
90
|
+
? doc.diagnostics.coordination_groups
|
|
91
|
+
: [];
|
|
92
|
+
let prevGroupId = null;
|
|
93
|
+
for (const group of groups) {
|
|
94
|
+
const id = String((group && group.id) || "");
|
|
95
|
+
if (prevGroupId && prevGroupId.localeCompare(id) > 0) {
|
|
96
|
+
failValidation(
|
|
97
|
+
"EA_VALIDATE_STRICT_COORDINATION_ORDER",
|
|
98
|
+
"Strict diagnostics error: diagnostics.coordination_groups must be sorted by id."
|
|
99
|
+
);
|
|
100
|
+
}
|
|
101
|
+
prevGroupId = id;
|
|
102
|
+
|
|
103
|
+
const memberIds = Array.isArray(group && group.member_assertion_ids) ? group.member_assertion_ids : [];
|
|
104
|
+
assertSortedStrings(
|
|
105
|
+
memberIds,
|
|
106
|
+
"EA_VALIDATE_STRICT_COORDINATION_MEMBER_ORDER",
|
|
107
|
+
"Strict diagnostics error: diagnostics.coordination_groups[*].member_assertion_ids must be sorted."
|
|
108
|
+
);
|
|
109
|
+
for (const assertionId of memberIds) {
|
|
110
|
+
if (!assertionById.has(assertionId)) {
|
|
111
|
+
failValidation(
|
|
112
|
+
"EA_VALIDATE_STRICT_COORDINATION_REFERENCE",
|
|
113
|
+
`Strict diagnostics error: coordination group ${id} references unknown assertion ${assertionId}.`
|
|
114
|
+
);
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
function validateSubjectRoleGaps(doc, assertionById, mentionById, tokenById) {
|
|
121
|
+
const gaps = Array.isArray((((doc || {}).diagnostics) || {}).subject_role_gaps)
|
|
122
|
+
? doc.diagnostics.subject_role_gaps
|
|
123
|
+
: [];
|
|
124
|
+
let prevKey = null;
|
|
125
|
+
for (const gap of gaps) {
|
|
126
|
+
const assertionId = String((gap && gap.assertion_id) || "");
|
|
127
|
+
const mentionId = String((gap && gap.predicate_mention_id) || "");
|
|
128
|
+
const headTokenId = String((gap && gap.predicate_head_token_id) || "");
|
|
129
|
+
const segmentId = String((gap && gap.segment_id) || "");
|
|
130
|
+
|
|
131
|
+
if (!assertionById.has(assertionId)) {
|
|
132
|
+
failValidation(
|
|
133
|
+
"EA_VALIDATE_STRICT_SUBJECT_GAP_REFERENCE",
|
|
134
|
+
`Strict diagnostics error: subject_role_gaps references unknown assertion ${assertionId}.`
|
|
135
|
+
);
|
|
136
|
+
}
|
|
137
|
+
if (!mentionById.has(mentionId)) {
|
|
138
|
+
failValidation(
|
|
139
|
+
"EA_VALIDATE_STRICT_SUBJECT_GAP_REFERENCE",
|
|
140
|
+
`Strict diagnostics error: subject_role_gaps references unknown mention ${mentionId}.`
|
|
141
|
+
);
|
|
142
|
+
}
|
|
143
|
+
if (!tokenById.has(headTokenId)) {
|
|
144
|
+
failValidation(
|
|
145
|
+
"EA_VALIDATE_STRICT_SUBJECT_GAP_REFERENCE",
|
|
146
|
+
`Strict diagnostics error: subject_role_gaps references unknown head token ${headTokenId}.`
|
|
147
|
+
);
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
const key = `${segmentId}|${assertionId}|${mentionId}`;
|
|
151
|
+
if (prevKey && prevKey.localeCompare(key) > 0) {
|
|
152
|
+
failValidation(
|
|
153
|
+
"EA_VALIDATE_STRICT_SUBJECT_GAP_ORDER",
|
|
154
|
+
"Strict diagnostics error: diagnostics.subject_role_gaps must be sorted by segment_id/assertion_id/predicate_mention_id."
|
|
155
|
+
);
|
|
156
|
+
}
|
|
157
|
+
prevKey = key;
|
|
158
|
+
|
|
159
|
+
const evidence = gap && gap.evidence && typeof gap.evidence === "object" ? gap.evidence : {};
|
|
160
|
+
const tokenIds = Array.isArray(evidence.token_ids) ? evidence.token_ids : [];
|
|
161
|
+
const upstreamRelationIds = Array.isArray(evidence.upstream_relation_ids) ? evidence.upstream_relation_ids : [];
|
|
162
|
+
assertSortedStrings(
|
|
163
|
+
tokenIds,
|
|
164
|
+
"EA_VALIDATE_STRICT_SUBJECT_GAP_EVIDENCE_ORDER",
|
|
165
|
+
"Strict diagnostics error: diagnostics.subject_role_gaps[*].evidence.token_ids must be sorted."
|
|
166
|
+
);
|
|
167
|
+
assertSortedStrings(
|
|
168
|
+
upstreamRelationIds,
|
|
169
|
+
"EA_VALIDATE_STRICT_SUBJECT_GAP_EVIDENCE_ORDER",
|
|
170
|
+
"Strict diagnostics error: diagnostics.subject_role_gaps[*].evidence.upstream_relation_ids must be sorted."
|
|
171
|
+
);
|
|
172
|
+
|
|
173
|
+
const assertion = assertionById.get(assertionId);
|
|
174
|
+
const actorEntries = (assertion && assertion.arguments) || [];
|
|
175
|
+
const actorMentionCount = actorEntries
|
|
176
|
+
.filter((entry) => String((entry && entry.role) || "") === "actor")
|
|
177
|
+
.reduce((count, entry) => count + (((entry && entry.mention_ids) || []).length), 0);
|
|
178
|
+
if (actorMentionCount > 0) {
|
|
179
|
+
failValidation(
|
|
180
|
+
"EA_VALIDATE_STRICT_SUBJECT_GAP_ACTOR_CONSISTENCY",
|
|
181
|
+
`Strict diagnostics error: subject_role_gap assertion ${assertionId} must not contain actor role entries.`
|
|
182
|
+
);
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
function validateFragmentation(doc) {
|
|
188
|
+
const fragmentation = (((doc || {}).diagnostics) || {}).fragmentation;
|
|
189
|
+
if (!fragmentation || typeof fragmentation !== "object") return;
|
|
190
|
+
const perSegment = Array.isArray(fragmentation.per_segment) ? fragmentation.per_segment : [];
|
|
191
|
+
let prevSegmentId = null;
|
|
192
|
+
for (const row of perSegment) {
|
|
193
|
+
const segmentId = String((row && row.segment_id) || "");
|
|
194
|
+
if (prevSegmentId && prevSegmentId.localeCompare(segmentId) > 0) {
|
|
195
|
+
failValidation(
|
|
196
|
+
"EA_VALIDATE_STRICT_FRAGMENTATION_ORDER",
|
|
197
|
+
"Strict diagnostics error: diagnostics.fragmentation.per_segment must be sorted by segment_id."
|
|
198
|
+
);
|
|
199
|
+
}
|
|
200
|
+
prevSegmentId = segmentId;
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
|
|
204
|
+
function validateGapSignals(doc) {
|
|
205
|
+
const gapSignals = (((doc || {}).diagnostics) || {}).gap_signals;
|
|
206
|
+
if (!gapSignals || typeof gapSignals !== "object") return;
|
|
207
|
+
for (const key of ["coordination_type_missing", "comparative_gap", "quantifier_scope_gap"]) {
|
|
208
|
+
if (typeof gapSignals[key] !== "boolean") {
|
|
209
|
+
failValidation(
|
|
210
|
+
"EA_VALIDATE_STRICT_GAP_SIGNALS",
|
|
211
|
+
`Strict diagnostics error: diagnostics.gap_signals.${key} must be boolean when gap_signals is present.`
|
|
212
|
+
);
|
|
213
|
+
}
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
function validateSuppressedAssertionsStrict(doc, assertionById) {
|
|
218
|
+
const suppressed = Array.isArray((((doc || {}).diagnostics) || {}).suppressed_assertions)
|
|
219
|
+
? doc.diagnostics.suppressed_assertions
|
|
220
|
+
: [];
|
|
221
|
+
let prevSuppressedId = null;
|
|
222
|
+
const seenSuppressedIds = new Set();
|
|
223
|
+
for (const item of suppressed) {
|
|
224
|
+
const suppressedId = String((item && item.id) || "");
|
|
225
|
+
if (prevSuppressedId !== null && prevSuppressedId.localeCompare(suppressedId) > 0) {
|
|
226
|
+
failValidation(
|
|
227
|
+
"EA_VALIDATE_STRICT_SUPPRESSED_LIST_ORDER",
|
|
228
|
+
"Strict diagnostics error: diagnostics.suppressed_assertions must be sorted by id."
|
|
229
|
+
);
|
|
230
|
+
}
|
|
231
|
+
if (seenSuppressedIds.has(suppressedId)) {
|
|
232
|
+
failValidation(
|
|
233
|
+
"EA_VALIDATE_STRICT_SUPPRESSED_LIST_DUPLICATE",
|
|
234
|
+
"Strict diagnostics error: diagnostics.suppressed_assertions must not contain duplicate ids."
|
|
235
|
+
);
|
|
236
|
+
}
|
|
237
|
+
seenSuppressedIds.add(suppressedId);
|
|
238
|
+
prevSuppressedId = suppressedId;
|
|
239
|
+
}
|
|
240
|
+
|
|
241
|
+
for (const item of suppressed) {
|
|
242
|
+
const suppressedId = String((item && item.id) || "");
|
|
243
|
+
const predicate = item && item.predicate && typeof item.predicate === "object" ? item.predicate : {};
|
|
244
|
+
const diagnostics = item && item.diagnostics && typeof item.diagnostics === "object" ? item.diagnostics : {};
|
|
245
|
+
const suppressedBy = diagnostics && diagnostics.suppressed_by && typeof diagnostics.suppressed_by === "object"
|
|
246
|
+
? diagnostics.suppressed_by
|
|
247
|
+
: {};
|
|
248
|
+
const targetAssertionId = String(suppressedBy.target_assertion_id || "");
|
|
249
|
+
const targetAssertion = assertionById.get(targetAssertionId);
|
|
250
|
+
const targetPredicateHeadTokenId = String(((((targetAssertion || {}).predicate) || {}).head_token_id) || "");
|
|
251
|
+
const sourcePredicateHeadTokenId = String(predicate.head_token_id || "");
|
|
252
|
+
const reason = String(suppressedBy.reason || "");
|
|
253
|
+
|
|
254
|
+
if (Object.prototype.hasOwnProperty.call(item || {}, "suppressed_assertion_id")) {
|
|
255
|
+
if (item.suppressed_assertion_id !== suppressedId) {
|
|
256
|
+
failValidation(
|
|
257
|
+
"EA_VALIDATE_STRICT_SUPPRESSED_SEMANTICS",
|
|
258
|
+
`Strict diagnostics error: suppressed assertion ${suppressedId} has mismatching suppressed_assertion_id.`
|
|
259
|
+
);
|
|
260
|
+
}
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
if (Object.prototype.hasOwnProperty.call(item || {}, "host_assertion_id")) {
|
|
264
|
+
if (item.host_assertion_id !== targetAssertionId) {
|
|
265
|
+
failValidation(
|
|
266
|
+
"EA_VALIDATE_STRICT_SUPPRESSED_SEMANTICS",
|
|
267
|
+
`Strict diagnostics error: suppressed assertion ${suppressedId} host_assertion_id must match diagnostics.suppressed_by.target_assertion_id.`
|
|
268
|
+
);
|
|
269
|
+
}
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
if (Object.prototype.hasOwnProperty.call(item || {}, "reason")) {
|
|
273
|
+
if (item.reason !== reason) {
|
|
274
|
+
failValidation(
|
|
275
|
+
"EA_VALIDATE_STRICT_SUPPRESSED_SEMANTICS",
|
|
276
|
+
`Strict diagnostics error: suppressed assertion ${suppressedId} top-level reason must match diagnostics.suppressed_by.reason.`
|
|
277
|
+
);
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
const suppressedByEvidence = suppressedBy && suppressedBy.evidence && typeof suppressedBy.evidence === "object"
|
|
282
|
+
? suppressedBy.evidence
|
|
283
|
+
: {};
|
|
284
|
+
const topLevelEvidence = item && item.evidence && typeof item.evidence === "object" ? item.evidence : {};
|
|
285
|
+
const suppressedByTokenIds = Array.isArray(suppressedByEvidence.token_ids) ? suppressedByEvidence.token_ids : [];
|
|
286
|
+
const topLevelTokenIds = Array.isArray(topLevelEvidence.token_ids) ? topLevelEvidence.token_ids : [];
|
|
287
|
+
const transferredBuckets = Array.isArray(item && item.transferred_buckets) ? item.transferred_buckets : [];
|
|
288
|
+
const transferredMentionIds = Array.isArray(item && item.transferred_mention_ids) ? item.transferred_mention_ids : [];
|
|
289
|
+
|
|
290
|
+
assertSortedStrings(
|
|
291
|
+
suppressedByTokenIds,
|
|
292
|
+
"EA_VALIDATE_STRICT_SUPPRESSED_SEMANTICS",
|
|
293
|
+
`Strict diagnostics error: suppressed assertion ${suppressedId} suppressed_by.evidence.token_ids must be sorted.`
|
|
294
|
+
);
|
|
295
|
+
assertSortedStrings(
|
|
296
|
+
topLevelTokenIds,
|
|
297
|
+
"EA_VALIDATE_STRICT_SUPPRESSED_SEMANTICS",
|
|
298
|
+
`Strict diagnostics error: suppressed assertion ${suppressedId} evidence.token_ids must be sorted.`
|
|
299
|
+
);
|
|
300
|
+
assertSortedStrings(
|
|
301
|
+
transferredBuckets,
|
|
302
|
+
"EA_VALIDATE_STRICT_SUPPRESSED_SEMANTICS",
|
|
303
|
+
`Strict diagnostics error: suppressed assertion ${suppressedId} transferred_buckets must be sorted.`
|
|
304
|
+
);
|
|
305
|
+
assertSortedStrings(
|
|
306
|
+
transferredMentionIds,
|
|
307
|
+
"EA_VALIDATE_STRICT_SUPPRESSED_SEMANTICS",
|
|
308
|
+
`Strict diagnostics error: suppressed assertion ${suppressedId} transferred_mention_ids must be sorted.`
|
|
309
|
+
);
|
|
310
|
+
|
|
311
|
+
const reasonsRequiringHeadEvidence = new Set([
|
|
312
|
+
"modality_moved_to_lexical",
|
|
313
|
+
"role_carrier_suppressed",
|
|
314
|
+
"role_carrier_suppressed_v2_nominal",
|
|
315
|
+
"copula_bucket_sink_suppressed",
|
|
316
|
+
]);
|
|
317
|
+
if (reasonsRequiringHeadEvidence.has(reason)) {
|
|
318
|
+
if (suppressedByTokenIds.length < 2) {
|
|
319
|
+
failValidation(
|
|
320
|
+
"EA_VALIDATE_STRICT_SUPPRESSED_SEMANTICS",
|
|
321
|
+
`Strict diagnostics error: suppressed assertion ${suppressedId} reason=${reason} requires token_ids evidence with source/target predicate tokens.`
|
|
322
|
+
);
|
|
323
|
+
}
|
|
324
|
+
if (!suppressedByTokenIds.includes(sourcePredicateHeadTokenId) || !suppressedByTokenIds.includes(targetPredicateHeadTokenId)) {
|
|
325
|
+
failValidation(
|
|
326
|
+
"EA_VALIDATE_STRICT_SUPPRESSED_SEMANTICS",
|
|
327
|
+
`Strict diagnostics error: suppressed assertion ${suppressedId} reason=${reason} token_ids evidence must include source and target predicate head tokens.`
|
|
328
|
+
);
|
|
329
|
+
}
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
if (reason === "role_carrier_suppressed_v2_nominal") {
|
|
333
|
+
if (String(item && item.predicate_class) !== "nominal_head") {
|
|
334
|
+
failValidation(
|
|
335
|
+
"EA_VALIDATE_STRICT_SUPPRESSED_SEMANTICS",
|
|
336
|
+
`Strict diagnostics error: suppressed assertion ${suppressedId} reason=role_carrier_suppressed_v2_nominal requires predicate_class=nominal_head.`
|
|
337
|
+
);
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
if (reason === "copula_bucket_sink_suppressed") {
|
|
342
|
+
const predicateClass = String(item && item.predicate_class);
|
|
343
|
+
if (predicateClass !== "copula" && predicateClass !== "auxiliary") {
|
|
344
|
+
failValidation(
|
|
345
|
+
"EA_VALIDATE_STRICT_SUPPRESSED_SEMANTICS",
|
|
346
|
+
`Strict diagnostics error: suppressed assertion ${suppressedId} reason=copula_bucket_sink_suppressed requires predicate_class copula|auxiliary.`
|
|
347
|
+
);
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
if (reason === "role_carrier_suppressed_v2_nominal" || reason === "copula_bucket_sink_suppressed") {
|
|
352
|
+
if (!Array.isArray(item && item.transferred_buckets) || !Array.isArray(item && item.transferred_mention_ids)) {
|
|
353
|
+
failValidation(
|
|
354
|
+
"EA_VALIDATE_STRICT_SUPPRESSED_SEMANTICS",
|
|
355
|
+
`Strict diagnostics error: suppressed assertion ${suppressedId} reason=${reason} requires transferred_buckets and transferred_mention_ids arrays.`
|
|
356
|
+
);
|
|
357
|
+
}
|
|
358
|
+
const hostMentionRefs = targetAssertion ? collectAssertionMentionRefs(targetAssertion) : new Set();
|
|
359
|
+
for (const mentionId of transferredMentionIds) {
|
|
360
|
+
if (!hostMentionRefs.has(mentionId)) {
|
|
361
|
+
failValidation(
|
|
362
|
+
"EA_VALIDATE_STRICT_SUPPRESSED_SEMANTICS",
|
|
363
|
+
`Strict diagnostics error: suppressed assertion ${suppressedId} transferred mention ${mentionId} must exist in host assertion mention refs.`
|
|
364
|
+
);
|
|
365
|
+
}
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
}
|
|
369
|
+
}
|
|
370
|
+
|
|
371
|
+
function validateDiagnosticsStrict(doc) {
|
|
372
|
+
const assertionById = buildAssertionMap(doc);
|
|
373
|
+
const mentionById = buildMentionMap(doc);
|
|
374
|
+
const tokenById = buildTokenMap(doc);
|
|
375
|
+
const warnings = Array.isArray((((doc || {}).diagnostics) || {}).warnings) ? doc.diagnostics.warnings : [];
|
|
376
|
+
assertSortedStrings(
|
|
377
|
+
warnings,
|
|
378
|
+
"EA_VALIDATE_STRICT_WARNING_ORDER",
|
|
379
|
+
"Strict diagnostics error: diagnostics.warnings must be sorted."
|
|
380
|
+
);
|
|
381
|
+
|
|
382
|
+
validateSuppressionEligibility(doc.assertions || [], assertionById);
|
|
383
|
+
validateFragmentation(doc);
|
|
384
|
+
validateGapSignals(doc);
|
|
385
|
+
validateCoordinationGroups(doc, assertionById);
|
|
386
|
+
validateSubjectRoleGaps(doc, assertionById, mentionById, tokenById);
|
|
387
|
+
validateSuppressedAssertionsStrict(doc, assertionById);
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
module.exports = {
|
|
391
|
+
validateDiagnosticsStrict,
|
|
392
|
+
};
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
class ValidationError extends Error {
|
|
2
|
+
constructor(code, message, details) {
|
|
3
|
+
super(`Validation error [${code}]: ${message}`);
|
|
4
|
+
this.name = "ValidationError";
|
|
5
|
+
this.code = code;
|
|
6
|
+
if (details !== undefined) {
|
|
7
|
+
this.details = details;
|
|
8
|
+
}
|
|
9
|
+
}
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
function failValidation(code, message, details) {
|
|
13
|
+
throw new ValidationError(code, message, details);
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
module.exports = {
|
|
17
|
+
ValidationError,
|
|
18
|
+
failValidation,
|
|
19
|
+
};
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
const { rejectLegacySlots, validateSchemaShape, validateSchemaContract } = require("./schema");
|
|
2
|
+
const { validateIntegrity } = require("./integrity");
|
|
3
|
+
const { validateDiagnosticsStrict } = require("./diagnostics-strict");
|
|
4
|
+
const { ValidationError } = require("./errors");
|
|
5
|
+
|
|
6
|
+
function validateElementaryAssertions(doc, options = {}) {
|
|
7
|
+
rejectLegacySlots(doc);
|
|
8
|
+
validateSchemaShape(doc);
|
|
9
|
+
validateSchemaContract(doc, options);
|
|
10
|
+
validateIntegrity(doc, options);
|
|
11
|
+
if (options && options.strict) {
|
|
12
|
+
validateDiagnosticsStrict(doc);
|
|
13
|
+
}
|
|
14
|
+
return { ok: true };
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
module.exports = {
|
|
18
|
+
validateElementaryAssertions,
|
|
19
|
+
ValidationError,
|
|
20
|
+
};
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
const { validateAssertionDeterminism } = require("./determinism");
|
|
2
|
+
const { validateCoverage } = require("./coverage");
|
|
3
|
+
const {
|
|
4
|
+
buildSegmentMap,
|
|
5
|
+
validateTokenSegmentAlignment,
|
|
6
|
+
validateMentionSegmentAlignment,
|
|
7
|
+
validateAssertionCrossFieldAlignment,
|
|
8
|
+
} = require("./invariants");
|
|
9
|
+
const {
|
|
10
|
+
ensureUniqueIds,
|
|
11
|
+
buildReferenceMaps,
|
|
12
|
+
validateMentionReferences,
|
|
13
|
+
validateAssertionReferences,
|
|
14
|
+
validateSuppressedReferences,
|
|
15
|
+
} = require("./references");
|
|
16
|
+
|
|
17
|
+
function validateIntegrity(doc, options = {}) {
|
|
18
|
+
ensureUniqueIds(doc.tokens, "token");
|
|
19
|
+
ensureUniqueIds(doc.mentions, "mention");
|
|
20
|
+
ensureUniqueIds(doc.assertions, "assertion");
|
|
21
|
+
|
|
22
|
+
const segmentById = buildSegmentMap(doc);
|
|
23
|
+
const { tokenById, mentionById, assertionById } = buildReferenceMaps(doc);
|
|
24
|
+
validateTokenSegmentAlignment(doc, segmentById);
|
|
25
|
+
validateMentionReferences(doc, tokenById);
|
|
26
|
+
validateMentionSegmentAlignment(doc, segmentById, tokenById);
|
|
27
|
+
validateAssertionCrossFieldAlignment(doc, segmentById, mentionById, tokenById);
|
|
28
|
+
|
|
29
|
+
for (const assertion of doc.assertions || []) {
|
|
30
|
+
const assertionId = (assertion && assertion.id) || "<unknown>";
|
|
31
|
+
validateAssertionReferences(assertion, assertionId, mentionById, tokenById);
|
|
32
|
+
validateAssertionDeterminism(assertion, assertionId);
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
validateSuppressedReferences(doc, assertionById);
|
|
36
|
+
validateCoverage(doc, mentionById, options);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
module.exports = {
|
|
40
|
+
validateIntegrity,
|
|
41
|
+
};
|
|
@@ -0,0 +1,157 @@
|
|
|
1
|
+
const { failValidation } = require("./errors");
|
|
2
|
+
|
|
3
|
+
function buildSegmentMap(doc) {
|
|
4
|
+
return new Map((doc.segments || []).map((s) => [s && s.id, s]).filter(([id]) => typeof id === "string" && id.length > 0));
|
|
5
|
+
}
|
|
6
|
+
|
|
7
|
+
function ensureSegmentExists(segmentById, segmentId, code, message) {
|
|
8
|
+
if (typeof segmentId !== "string" || !segmentById.has(segmentId)) {
|
|
9
|
+
failValidation(code, message);
|
|
10
|
+
}
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
function validateTokenSegmentAlignment(doc, segmentById) {
|
|
14
|
+
for (const token of doc.tokens || []) {
|
|
15
|
+
const tokenId = String((token && token.id) || "<unknown>");
|
|
16
|
+
ensureSegmentExists(
|
|
17
|
+
segmentById,
|
|
18
|
+
token && token.segment_id,
|
|
19
|
+
"EA_VALIDATE_UNKNOWN_SEGMENT_REFERENCE",
|
|
20
|
+
`Integrity error: token ${tokenId} references unknown segment_id.`
|
|
21
|
+
);
|
|
22
|
+
}
|
|
23
|
+
}
|
|
24
|
+
|
|
25
|
+
function validateMentionSegmentAlignment(doc, segmentById, tokenById) {
|
|
26
|
+
for (const mention of doc.mentions || []) {
|
|
27
|
+
const mentionId = String((mention && mention.id) || "<unknown>");
|
|
28
|
+
const mentionSegmentId = mention && mention.segment_id;
|
|
29
|
+
ensureSegmentExists(
|
|
30
|
+
segmentById,
|
|
31
|
+
mentionSegmentId,
|
|
32
|
+
"EA_VALIDATE_UNKNOWN_SEGMENT_REFERENCE",
|
|
33
|
+
`Integrity error: mention ${mentionId} references unknown segment_id.`
|
|
34
|
+
);
|
|
35
|
+
|
|
36
|
+
const segment = segmentById.get(mentionSegmentId);
|
|
37
|
+
const tokenIds = Array.isArray(mention && mention.token_ids) ? mention.token_ids : [];
|
|
38
|
+
let minStart = Infinity;
|
|
39
|
+
let maxEnd = -Infinity;
|
|
40
|
+
|
|
41
|
+
for (const tokenId of tokenIds) {
|
|
42
|
+
const token = tokenById.get(tokenId);
|
|
43
|
+
if (!token) continue;
|
|
44
|
+
if (token.segment_id !== mentionSegmentId) {
|
|
45
|
+
failValidation(
|
|
46
|
+
"EA_VALIDATE_MENTION_SEGMENT_MISMATCH",
|
|
47
|
+
`Integrity error: mention ${mentionId} includes token ${tokenId} from another segment.`
|
|
48
|
+
);
|
|
49
|
+
}
|
|
50
|
+
if (token.span && typeof token.span.start === "number" && token.span.start < minStart) minStart = token.span.start;
|
|
51
|
+
if (token.span && typeof token.span.end === "number" && token.span.end > maxEnd) maxEnd = token.span.end;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
if (mention && mention.head_token_id) {
|
|
55
|
+
const headToken = tokenById.get(mention.head_token_id);
|
|
56
|
+
if (headToken && headToken.segment_id !== mentionSegmentId) {
|
|
57
|
+
failValidation(
|
|
58
|
+
"EA_VALIDATE_MENTION_SEGMENT_MISMATCH",
|
|
59
|
+
`Integrity error: mention ${mentionId} head token is not in mention.segment_id.`
|
|
60
|
+
);
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
if (
|
|
65
|
+
segment &&
|
|
66
|
+
mention &&
|
|
67
|
+
mention.span &&
|
|
68
|
+
typeof mention.span.start === "number" &&
|
|
69
|
+
typeof mention.span.end === "number"
|
|
70
|
+
) {
|
|
71
|
+
if (mention.span.start < segment.span.start || mention.span.end > segment.span.end) {
|
|
72
|
+
failValidation(
|
|
73
|
+
"EA_VALIDATE_MENTION_SPAN_SEGMENT_BOUNDS",
|
|
74
|
+
`Integrity error: mention ${mentionId} span falls outside its segment bounds.`
|
|
75
|
+
);
|
|
76
|
+
}
|
|
77
|
+
if (Number.isFinite(minStart) && Number.isFinite(maxEnd)) {
|
|
78
|
+
if (mention.span.start > minStart || mention.span.end < maxEnd) {
|
|
79
|
+
failValidation(
|
|
80
|
+
"EA_VALIDATE_MENTION_SPAN_TOKEN_COVERAGE",
|
|
81
|
+
`Integrity error: mention ${mentionId} span must cover all mention token spans.`
|
|
82
|
+
);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
|
|
89
|
+
function validateAssertionCrossFieldAlignment(doc, segmentById, mentionById, tokenById) {
|
|
90
|
+
for (const assertion of doc.assertions || []) {
|
|
91
|
+
const assertionId = String((assertion && assertion.id) || "<unknown>");
|
|
92
|
+
const assertionSegmentId = assertion && assertion.segment_id;
|
|
93
|
+
ensureSegmentExists(
|
|
94
|
+
segmentById,
|
|
95
|
+
assertionSegmentId,
|
|
96
|
+
"EA_VALIDATE_UNKNOWN_SEGMENT_REFERENCE",
|
|
97
|
+
`Integrity error: assertion ${assertionId} references unknown segment_id.`
|
|
98
|
+
);
|
|
99
|
+
|
|
100
|
+
const predicate = assertion && assertion.predicate && typeof assertion.predicate === "object" ? assertion.predicate : {};
|
|
101
|
+
const predicateMention = mentionById.get(predicate.mention_id);
|
|
102
|
+
if (predicateMention && predicateMention.segment_id !== assertionSegmentId) {
|
|
103
|
+
failValidation(
|
|
104
|
+
"EA_VALIDATE_ASSERTION_SEGMENT_MISMATCH",
|
|
105
|
+
`Integrity error: assertion ${assertionId} predicate mention segment mismatch.`
|
|
106
|
+
);
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
if (predicateMention && predicate.head_token_id !== predicateMention.head_token_id) {
|
|
110
|
+
failValidation(
|
|
111
|
+
"EA_VALIDATE_PREDICATE_HEAD_MISMATCH",
|
|
112
|
+
`Integrity error: assertion ${assertionId} predicate.head_token_id does not match mention head token.`
|
|
113
|
+
);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
const predicateHeadToken = tokenById.get(predicate.head_token_id);
|
|
117
|
+
if (predicateHeadToken && predicateHeadToken.segment_id !== assertionSegmentId) {
|
|
118
|
+
failValidation(
|
|
119
|
+
"EA_VALIDATE_ASSERTION_SEGMENT_MISMATCH",
|
|
120
|
+
`Integrity error: assertion ${assertionId} predicate head token segment mismatch.`
|
|
121
|
+
);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
for (const entry of (assertion && assertion.arguments) || []) {
|
|
125
|
+
const mentionIds = Array.isArray(entry && entry.mention_ids) ? entry.mention_ids : [];
|
|
126
|
+
for (const mentionId of mentionIds) {
|
|
127
|
+
const mention = mentionById.get(mentionId);
|
|
128
|
+
if (mention && mention.segment_id !== assertionSegmentId) {
|
|
129
|
+
failValidation(
|
|
130
|
+
"EA_VALIDATE_ASSERTION_SEGMENT_MISMATCH",
|
|
131
|
+
`Integrity error: assertion ${assertionId} argument mention ${mentionId} segment mismatch.`
|
|
132
|
+
);
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
|
|
137
|
+
for (const entry of (assertion && assertion.modifiers) || []) {
|
|
138
|
+
const mentionIds = Array.isArray(entry && entry.mention_ids) ? entry.mention_ids : [];
|
|
139
|
+
for (const mentionId of mentionIds) {
|
|
140
|
+
const mention = mentionById.get(mentionId);
|
|
141
|
+
if (mention && mention.segment_id !== assertionSegmentId) {
|
|
142
|
+
failValidation(
|
|
143
|
+
"EA_VALIDATE_ASSERTION_SEGMENT_MISMATCH",
|
|
144
|
+
`Integrity error: assertion ${assertionId} modifier mention ${mentionId} segment mismatch.`
|
|
145
|
+
);
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
module.exports = {
|
|
153
|
+
buildSegmentMap,
|
|
154
|
+
validateTokenSegmentAlignment,
|
|
155
|
+
validateMentionSegmentAlignment,
|
|
156
|
+
validateAssertionCrossFieldAlignment,
|
|
157
|
+
};
|