elementary-assertions 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. package/CHANGELOG.md +353 -0
  2. package/LICENSE +21 -0
  3. package/README.md +211 -0
  4. package/bin/elementary-assertions.js +8 -0
  5. package/docs/DEV_TOOLING.md +98 -0
  6. package/docs/NPM_RELEASE.md +177 -0
  7. package/docs/OPERATIONAL.md +159 -0
  8. package/docs/RELEASE_NOTES_TEMPLATE.md +37 -0
  9. package/docs/REPO_WORKFLOWS.md +48 -0
  10. package/package.json +46 -0
  11. package/src/core/accepted-annotations.js +44 -0
  12. package/src/core/assertions.js +2304 -0
  13. package/src/core/determinism.js +95 -0
  14. package/src/core/diagnostics.js +496 -0
  15. package/src/core/ids.js +9 -0
  16. package/src/core/mention-builder.js +272 -0
  17. package/src/core/mention-evidence.js +52 -0
  18. package/src/core/mention-head-resolution.js +108 -0
  19. package/src/core/mention-materialization.js +31 -0
  20. package/src/core/mentions.js +149 -0
  21. package/src/core/output.js +296 -0
  22. package/src/core/projection.js +192 -0
  23. package/src/core/roles.js +164 -0
  24. package/src/core/strings.js +7 -0
  25. package/src/core/tokens.js +53 -0
  26. package/src/core/upstream.js +31 -0
  27. package/src/index.js +6 -0
  28. package/src/render/index.js +5 -0
  29. package/src/render/layouts/compact.js +10 -0
  30. package/src/render/layouts/meaning.js +7 -0
  31. package/src/render/layouts/readable.js +7 -0
  32. package/src/render/layouts/table.js +7 -0
  33. package/src/render/render.js +931 -0
  34. package/src/run.js +278 -0
  35. package/src/schema/seed.elementary-assertions.schema.json +1751 -0
  36. package/src/tools/cli.js +158 -0
  37. package/src/tools/index.js +6 -0
  38. package/src/tools/io.js +55 -0
  39. package/src/validate/ajv.js +20 -0
  40. package/src/validate/coverage.js +215 -0
  41. package/src/validate/determinism.js +115 -0
  42. package/src/validate/diagnostics-strict.js +392 -0
  43. package/src/validate/errors.js +19 -0
  44. package/src/validate/index.js +20 -0
  45. package/src/validate/integrity.js +41 -0
  46. package/src/validate/invariants.js +157 -0
  47. package/src/validate/references.js +110 -0
  48. package/src/validate/schema.js +50 -0
@@ -0,0 +1,2304 @@
1
+ const { sha256Hex, canonicalizeOperatorsForHash, stableObjectKey, normalizeIds, dedupeAndSortEvidence } = require('./determinism');
2
+ const { roleToSlot, isCompareLabel, isQuantifierLabel, chooseBestMentionForToken, buildAssertionWikiSignals, isSubjectRoleLabel } = require('./mentions');
3
+ const { buildCoordinationGroups } = require('./projection');
4
+ const { mergeOperator } = require('./diagnostics');
5
+ const {
6
+ argumentRolePriority,
7
+ modifierRolePriority,
8
+ collectEntryTokenIds,
9
+ canonicalizeRoleEntries,
10
+ collectAssertionMentionRefs,
11
+ } = require('./roles');
12
+
13
+ function isVerbPosTag(tag) {
14
+ return typeof tag === 'string' && /^VB/.test(tag);
15
+ }
16
+
17
+ function isLexicalVerbPos(tag) {
18
+ return tag === 'VB' || tag === 'VBD' || tag === 'VBG' || tag === 'VBN' || tag === 'VBP' || tag === 'VBZ';
19
+ }
20
+
21
+ function classifyPredicateClass(token) {
22
+ const surface = lower(token && token.surface);
23
+ const tag = String((((token || {}).pos) || {}).tag || '').toUpperCase();
24
+ if (isCopulaSurface(surface)) return 'copula';
25
+ if (tag === 'MD' || surface === 'given') return 'auxiliary';
26
+ if (tag === 'IN' || tag === 'TO') return 'preposition';
27
+ if (isLexicalVerbPos(tag)) return 'lexical_verb';
28
+ return 'nominal_head';
29
+ }
30
+
31
+ function isNounLikePosTag(tag) {
32
+ return typeof tag === 'string' && /^(NN|NNS|NNP|NNPS|PRP|PRP\$|CD)$/.test(tag);
33
+ }
34
+
35
+ function isCopulaSurface(surface) {
36
+ const s = String(surface || '').toLowerCase();
37
+ return s === 'is' || s === 'are' || s === 'was' || s === 'were' || s === 'be' || s === 'been' || s === 'being';
38
+ }
39
+
40
+ function lower(s) {
41
+ return String(s || '').toLowerCase();
42
+ }
43
+
44
+ function isLowQualityPredicateToken(token) {
45
+ const surface = lower(token && token.surface);
46
+ const tag = String((((token || {}).pos) || {}).tag || '');
47
+ if (tag === 'MD') return true;
48
+ return surface === 'is' || surface === 'are' || surface === 'am' || surface === 'be' || surface === 'been' || surface === 'being' || surface === 'given';
49
+ }
50
+
51
+ function isMakeSureScaffoldPredicate({ predTok, projected, tokensBySegment }) {
52
+ const predSurface = lower(predTok && predTok.surface);
53
+ if (predSurface !== 'make') return false;
54
+ const hasIncomingClauseLink = projected.some((r) => {
55
+ if (!r || r.dep_token_id !== predTok.id) return false;
56
+ return r.label === 'complement_clause' || r.label === 'xcomp' || r.label === 'ccomp' || r.label === 'purpose';
57
+ });
58
+ if (!hasIncomingClauseLink) return false;
59
+ const segTokens = tokensBySegment.get(predTok.segment_id) || [];
60
+ const idx = segTokens.findIndex((t) => t && t.id === predTok.id);
61
+ if (idx < 0) return false;
62
+ for (let i = idx + 1; i < Math.min(segTokens.length, idx + 4); i += 1) {
63
+ const tok = segTokens[i];
64
+ const s = lower(tok && tok.surface);
65
+ if (!s || s === ',' || s === ';' || s === ':') continue;
66
+ return s === 'sure';
67
+ }
68
+ return false;
69
+ }
70
+
71
+ function roleBucketsAreSemanticallyEmpty(roleBuckets) {
72
+ return (
73
+ Array.isArray(roleBuckets.actor) && roleBuckets.actor.length === 0 &&
74
+ Array.isArray(roleBuckets.theme) && roleBuckets.theme.length === 0 &&
75
+ Array.isArray(roleBuckets.attr) && roleBuckets.attr.length === 0 &&
76
+ Array.isArray(roleBuckets.topic) && roleBuckets.topic.length === 0 &&
77
+ Array.isArray(roleBuckets.location) && roleBuckets.location.length === 0 &&
78
+ Array.isArray(roleBuckets.other) && roleBuckets.other.length === 0
79
+ );
80
+ }
81
+
82
+ function assertionRoleBuckets(assertion) {
83
+ const roleBuckets = { actor: [], theme: [], attr: [], topic: [], location: [], other: [] };
84
+ for (const entry of assertion && Array.isArray(assertion.arguments) ? assertion.arguments : []) {
85
+ const role = String((entry && entry.role) || '');
86
+ const mentionIds = normalizeIds(Array.isArray(entry && entry.mention_ids) ? entry.mention_ids : []);
87
+ if (mentionIds.length === 0) continue;
88
+ if (role === 'actor' || isSubjectRoleLabel(role)) roleBuckets.actor = normalizeIds(roleBuckets.actor.concat(mentionIds));
89
+ else if (role === 'theme') roleBuckets.theme = normalizeIds(roleBuckets.theme.concat(mentionIds));
90
+ else if (role === 'attribute') roleBuckets.attr = normalizeIds(roleBuckets.attr.concat(mentionIds));
91
+ else if (role === 'topic') roleBuckets.topic = normalizeIds(roleBuckets.topic.concat(mentionIds));
92
+ else if (role === 'location') roleBuckets.location = normalizeIds(roleBuckets.location.concat(mentionIds));
93
+ else roleBuckets.other.push({ role, mention_ids: mentionIds });
94
+ }
95
+ for (const entry of assertion && Array.isArray(assertion.modifiers) ? assertion.modifiers : []) {
96
+ const role = String((entry && entry.role) || '');
97
+ const mentionIds = normalizeIds(Array.isArray(entry && entry.mention_ids) ? entry.mention_ids : []);
98
+ if (!role || mentionIds.length === 0) continue;
99
+ roleBuckets.other.push({ role, mention_ids: mentionIds });
100
+ }
101
+ roleBuckets.other = roleBuckets.other
102
+ .map((entry) => ({ role: entry.role, mention_ids: normalizeIds(entry.mention_ids || []) }))
103
+ .filter((entry) => entry.mention_ids.length > 0)
104
+ .sort((a, b) => {
105
+ if (a.role !== b.role) return a.role.localeCompare(b.role);
106
+ return JSON.stringify(a.mention_ids).localeCompare(JSON.stringify(b.mention_ids));
107
+ });
108
+ return roleBuckets;
109
+ }
110
+
111
+ function canonicalizeRoleBuckets(roleBuckets, mentionById) {
112
+ const source = roleBuckets || {};
113
+ const argumentEntries = [];
114
+ const coreMappings = [
115
+ { key: 'actor', role: 'actor' },
116
+ { key: 'theme', role: 'theme' },
117
+ { key: 'attr', role: 'attribute' },
118
+ { key: 'topic', role: 'topic' },
119
+ { key: 'location', role: 'location' },
120
+ ];
121
+ for (const mapping of coreMappings) {
122
+ const mentionIds = normalizeIds(Array.isArray(source[mapping.key]) ? source[mapping.key] : []);
123
+ if (mentionIds.length === 0) continue;
124
+ argumentEntries.push({
125
+ role: mapping.role,
126
+ mention_ids: mentionIds,
127
+ evidence: {
128
+ relation_ids: [],
129
+ token_ids: collectEntryTokenIds(mentionIds, mentionById),
130
+ },
131
+ });
132
+ }
133
+
134
+ const modifierEntries = [];
135
+ for (const entry of Array.isArray(source.other) ? source.other : []) {
136
+ const role = String((entry && entry.role) || '').trim();
137
+ const mentionIds = normalizeIds(Array.isArray(entry && entry.mention_ids) ? entry.mention_ids : []);
138
+ if (!role || mentionIds.length === 0) continue;
139
+ modifierEntries.push({
140
+ role,
141
+ mention_ids: mentionIds,
142
+ evidence: {
143
+ relation_ids: [],
144
+ token_ids: collectEntryTokenIds(mentionIds, mentionById),
145
+ },
146
+ });
147
+ }
148
+
149
+ return {
150
+ arguments: canonicalizeRoleEntries(argumentEntries, argumentRolePriority),
151
+ modifiers: canonicalizeRoleEntries(modifierEntries, modifierRolePriority),
152
+ };
153
+ }
154
+
155
+ function rolePayloadHashInput(rolePayload) {
156
+ return JSON.stringify({ arguments: rolePayload.arguments, modifiers: rolePayload.modifiers });
157
+ }
158
+
159
+ function applyRoleBucketsToAssertion(assertion, roleBuckets, mentionById) {
160
+ const rolePayload = canonicalizeRoleBuckets(roleBuckets, mentionById);
161
+ assertion.arguments = rolePayload.arguments;
162
+ assertion.modifiers = rolePayload.modifiers;
163
+ }
164
+
165
+ function isClauseBoundaryToken(token) {
166
+ const surface = lower(token && token.surface);
167
+ return (
168
+ surface === '.' ||
169
+ surface === ',' ||
170
+ surface === ';' ||
171
+ surface === ':' ||
172
+ surface === '!' ||
173
+ surface === '?'
174
+ );
175
+ }
176
+
177
+ function assertionClauseWindowKey(assertion, tokenById, tokensBySegment) {
178
+ const segmentId = String((assertion && assertion.segment_id) || '');
179
+ const predTokenId = String((((assertion || {}).predicate) || {}).head_token_id || '');
180
+ if (!segmentId || !predTokenId) return `${segmentId}|window:unknown`;
181
+ const segTokens = tokensBySegment.get(segmentId) || [];
182
+ const idx = segTokens.findIndex((t) => t && t.id === predTokenId);
183
+ if (idx < 0) return `${segmentId}|window:unknown`;
184
+ let left = idx;
185
+ let right = idx;
186
+ while (left - 1 >= 0 && !isClauseBoundaryToken(segTokens[left - 1])) left -= 1;
187
+ while (right + 1 < segTokens.length && !isClauseBoundaryToken(segTokens[right + 1])) right += 1;
188
+ const leftToken = segTokens[left];
189
+ const rightToken = segTokens[right];
190
+ return `${segmentId}|${leftToken ? leftToken.id : String(left)}|${rightToken ? rightToken.id : String(right)}`;
191
+ }
192
+
193
+ function assertionHasBlockingOperators(assertion) {
194
+ const ops = Array.isArray(assertion && assertion.operators) ? assertion.operators : [];
195
+ return ops.some((op) => {
196
+ const kind = String((op && op.kind) || '');
197
+ return kind === 'modality' || kind === 'negation' || kind === 'coordination_group';
198
+ });
199
+ }
200
+
201
+ function addToOtherSlot(host, role, mentionIds, mentionById) {
202
+ if (!host) return false;
203
+ const roleBuckets = assertionRoleBuckets(host);
204
+ const cleaned = normalizeIds((mentionIds || []).filter((id) => typeof id === 'string' && id.length > 0));
205
+ if (cleaned.length === 0) return false;
206
+ if (!Array.isArray(roleBuckets.other)) roleBuckets.other = [];
207
+ const existing = roleBuckets.other.find((o) => o && o.role === role);
208
+ if (!existing) {
209
+ roleBuckets.other.push({ role, mention_ids: cleaned });
210
+ roleBuckets.other.sort((a, b) => String(a.role || '').localeCompare(String(b.role || '')));
211
+ applyRoleBucketsToAssertion(host, roleBuckets, mentionById);
212
+ return true;
213
+ }
214
+ const merged = normalizeIds((existing.mention_ids || []).concat(cleaned));
215
+ if (merged.length === (existing.mention_ids || []).length && merged.every((id, idx) => id === existing.mention_ids[idx])) {
216
+ return false;
217
+ }
218
+ existing.mention_ids = merged;
219
+ roleBuckets.other.sort((a, b) => String(a.role || '').localeCompare(String(b.role || '')));
220
+ applyRoleBucketsToAssertion(host, roleBuckets, mentionById);
221
+ return true;
222
+ }
223
+
224
+ function transferRoleCarrierBucketsToHost(source, host, mentionById) {
225
+ const transferred = new Set();
226
+ const hostRefs = collectAssertionMentionRefs(host);
227
+ const sourceSlots = assertionRoleBuckets(source);
228
+ for (const slotName of ['actor', 'theme', 'attr', 'topic', 'location']) {
229
+ const ids = normalizeIds(((sourceSlots || {})[slotName] || []).filter((id) => typeof id === 'string' && id.length > 0));
230
+ if (ids.length === 0) continue;
231
+ const missing = ids.filter((id) => !hostRefs.has(id));
232
+ if (missing.length === 0) continue;
233
+ if (addToOtherSlot(host, `attached_${slotName}`, missing, mentionById)) {
234
+ transferred.add(slotName);
235
+ for (const id of missing) hostRefs.add(id);
236
+ }
237
+ }
238
+ for (const entry of ((sourceSlots || {}).other || [])) {
239
+ const role = `attached_${String((entry && entry.role) || 'other')}`;
240
+ const ids = normalizeIds((entry && entry.mention_ids) || []);
241
+ if (ids.length === 0) continue;
242
+ const missing = ids.filter((id) => !hostRefs.has(id));
243
+ if (missing.length === 0) continue;
244
+ if (addToOtherSlot(host, role, missing, mentionById)) {
245
+ transferred.add('other');
246
+ for (const id of missing) hostRefs.add(id);
247
+ }
248
+ }
249
+ return normalizeIds(Array.from(transferred));
250
+ }
251
+ function collectRoleBucketMentionIds(source, includeSlots) {
252
+ const out = new Set();
253
+ const roleBuckets = assertionRoleBuckets(source);
254
+ for (const slotName of includeSlots || []) {
255
+ if (slotName === 'other') {
256
+ for (const entry of roleBuckets.other || []) {
257
+ for (const id of entry.mention_ids || []) out.add(id);
258
+ }
259
+ continue;
260
+ }
261
+ for (const id of roleBuckets[slotName] || []) out.add(id);
262
+ }
263
+ return normalizeIds(Array.from(out));
264
+ }
265
+
266
+ function transferNamedBucketsToHostOther(source, host, mapping, mentionById) {
267
+ const transferredSlots = new Set();
268
+ const transferredMentionIds = new Set();
269
+ const sourceMentionIds = new Set();
270
+ const hostRefs = collectAssertionMentionRefs(host);
271
+ const roleBuckets = assertionRoleBuckets(source);
272
+ for (const mapEntry of mapping || []) {
273
+ const from = String((mapEntry && mapEntry.from) || '');
274
+ const to = String((mapEntry && mapEntry.to) || '');
275
+ if (!from || !to) continue;
276
+ if (from === 'other') {
277
+ for (const entry of roleBuckets.other || []) {
278
+ const ids = normalizeIds((entry && entry.mention_ids) || []);
279
+ if (ids.length === 0) continue;
280
+ for (const id of ids) sourceMentionIds.add(id);
281
+ const missing = ids.filter((id) => !hostRefs.has(id));
282
+ if (missing.length === 0) continue;
283
+ if (addToOtherSlot(host, to, missing, mentionById)) {
284
+ transferredSlots.add('other');
285
+ for (const id of missing) {
286
+ hostRefs.add(id);
287
+ transferredMentionIds.add(id);
288
+ }
289
+ }
290
+ }
291
+ continue;
292
+ }
293
+ const ids = normalizeIds((roleBuckets[from] || []).filter((id) => typeof id === 'string' && id.length > 0));
294
+ if (ids.length === 0) continue;
295
+ for (const id of ids) sourceMentionIds.add(id);
296
+ const missing = ids.filter((id) => !hostRefs.has(id));
297
+ if (missing.length === 0) continue;
298
+ if (addToOtherSlot(host, to, missing, mentionById)) {
299
+ transferredSlots.add(from);
300
+ for (const id of missing) {
301
+ hostRefs.add(id);
302
+ transferredMentionIds.add(id);
303
+ }
304
+ }
305
+ }
306
+ return {
307
+ transferred_buckets: normalizeIds(Array.from(transferredSlots)),
308
+ transferred_mention_ids: normalizeIds(Array.from(sourceMentionIds.size > 0 ? sourceMentionIds : transferredMentionIds)),
309
+ };
310
+ }
311
+
312
+ function transferOperatorsToHost(source, host) {
313
+ const sourceOps = Array.isArray(source && source.operators) ? source.operators : [];
314
+ if (sourceOps.length === 0) return [];
315
+ const opMap = new Map();
316
+ for (const op of (host && host.operators) || []) mergeOperator(opMap, op);
317
+ const transferredKinds = new Set();
318
+ for (const op of sourceOps) {
319
+ mergeOperator(opMap, op);
320
+ const kind = String((op && op.kind) || '');
321
+ if (kind) transferredKinds.add(kind);
322
+ }
323
+ host.operators = Array.from(opMap.values()).sort((a, b) => {
324
+ if ((a.kind || '') !== (b.kind || '')) return (a.kind || '').localeCompare(b.kind || '');
325
+ if ((a.value || '') !== (b.value || '')) return (a.value || '').localeCompare(b.value || '');
326
+ if ((a.group_id || '') !== (b.group_id || '')) return (a.group_id || '').localeCompare(b.group_id || '');
327
+ if ((a.token_id || '') !== (b.token_id || '')) return (a.token_id || '').localeCompare(b.token_id || '');
328
+ return (a.role || '').localeCompare(b.role || '');
329
+ });
330
+ return normalizeIds(Array.from(transferredKinds));
331
+ }
332
+
333
+ function dedupeOtherMentionsAgainstCoreBuckets(assertion, mentionById) {
334
+ if (!assertion) return;
335
+ const roleBuckets = assertionRoleBuckets(assertion);
336
+ if (!Array.isArray(roleBuckets.other)) return;
337
+ const core = new Set(
338
+ normalizeIds(
339
+ []
340
+ .concat(roleBuckets.theme || [])
341
+ .concat(roleBuckets.attr || [])
342
+ .concat(roleBuckets.topic || [])
343
+ .concat(roleBuckets.location || [])
344
+ )
345
+ );
346
+ if (core.size === 0) return;
347
+ const cleanedOther = [];
348
+ for (const entry of roleBuckets.other) {
349
+ if (!entry || typeof entry.role !== 'string') continue;
350
+ const kept = normalizeIds((entry.mention_ids || []).filter((id) => !core.has(id)));
351
+ if (kept.length === 0) continue;
352
+ cleanedOther.push({ role: entry.role, mention_ids: kept });
353
+ }
354
+ cleanedOther.sort((a, b) => String(a.role || '').localeCompare(String(b.role || '')));
355
+ roleBuckets.other = cleanedOther;
356
+ applyRoleBucketsToAssertion(assertion, roleBuckets, mentionById);
357
+ }
358
+
359
+ function enforceCoreBucketTokenDisjointness(assertion, mentionById, tokenById) {
360
+ if (!assertion || !assertion.predicate) return;
361
+ const roleBuckets = assertionRoleBuckets(assertion);
362
+ const predMentionId = String((((assertion || {}).predicate) || {}).mention_id || '');
363
+ const predMention = mentionById.get(predMentionId);
364
+ const predHeadTokenId = String((predMention && predMention.head_token_id) || '');
365
+ const predToken = predHeadTokenId ? tokenById.get(predHeadTokenId) : null;
366
+ const predTag = String((((predToken || {}).pos) || {}).tag || '');
367
+ const strictPredicateOverlap = isVerbPosTag(predTag);
368
+ const predicateTokenIds = new Set(Array.isArray(predMention && predMention.token_ids) ? predMention.token_ids : []);
369
+ const reservedTokenIds = new Set(Array.from(predicateTokenIds));
370
+ const prioritySlots = ['actor', 'location', 'theme', 'attr', 'topic'];
371
+ for (const slotName of prioritySlots) {
372
+ const current = normalizeIds((roleBuckets[slotName] || []).filter((id) => typeof id === 'string' && id.length > 0));
373
+ const kept = [];
374
+ for (const mentionId of current) {
375
+ const mention = mentionById.get(mentionId);
376
+ const tokenIds = Array.isArray(mention && mention.token_ids) ? mention.token_ids : [];
377
+ const overlaps = strictPredicateOverlap
378
+ ? tokenIds.some((tid) => reservedTokenIds.has(tid))
379
+ : (tokenIds.length === predicateTokenIds.size && tokenIds.every((tid) => predicateTokenIds.has(tid)));
380
+ if (overlaps) continue;
381
+ kept.push(mentionId);
382
+ for (const tid of tokenIds) reservedTokenIds.add(tid);
383
+ }
384
+ roleBuckets[slotName] = normalizeIds(kept);
385
+ }
386
+ if (!Array.isArray(roleBuckets.other)) {
387
+ roleBuckets.other = [];
388
+ applyRoleBucketsToAssertion(assertion, roleBuckets, mentionById);
389
+ return;
390
+ }
391
+ const cleanedOther = [];
392
+ for (const entry of roleBuckets.other) {
393
+ if (!entry || typeof entry.role !== 'string') continue;
394
+ const ids = normalizeIds((entry.mention_ids || []).filter((id) => typeof id === 'string' && id.length > 0));
395
+ const kept = [];
396
+ for (const mentionId of ids) {
397
+ const mention = mentionById.get(mentionId);
398
+ const tokenIds = Array.isArray(mention && mention.token_ids) ? mention.token_ids : [];
399
+ const overlaps = strictPredicateOverlap
400
+ ? tokenIds.some((tid) => reservedTokenIds.has(tid))
401
+ : (tokenIds.length === predicateTokenIds.size && tokenIds.every((tid) => predicateTokenIds.has(tid)));
402
+ if (overlaps) continue;
403
+ kept.push(mentionId);
404
+ for (const tid of tokenIds) reservedTokenIds.add(tid);
405
+ }
406
+ if (kept.length > 0) cleanedOther.push({ role: entry.role, mention_ids: kept });
407
+ }
408
+ cleanedOther.sort((a, b) => String(a.role || '').localeCompare(String(b.role || '')));
409
+ roleBuckets.other = cleanedOther;
410
+ applyRoleBucketsToAssertion(assertion, roleBuckets, mentionById);
411
+ }
412
+
413
+ function pruneLowCopulaBuckets(assertion, mentionById, tokenById, tokensBySegment) {
414
+ if (!assertion || !assertion.predicate) return;
415
+ const roleBuckets = assertionRoleBuckets(assertion);
416
+ const predicateQuality = String((((assertion || {}).diagnostics) || {}).predicate_quality || '');
417
+ if (predicateQuality !== 'low') return;
418
+ const predMentionId = String((((assertion || {}).predicate) || {}).mention_id || '');
419
+ const predMention = mentionById.get(predMentionId);
420
+ if (!predMention) return;
421
+ const predToken = tokenById.get(predMention.head_token_id);
422
+ if (!predToken || !isCopulaSurface(predToken.surface || '')) return;
423
+
424
+ const segmentTokens = tokensBySegment.get(predMention.segment_id) || [];
425
+ const boundarySurfaceSet = new Set(['where', 'that', 'which', 'who', 'whom', 'whose', 'when', 'while']);
426
+ let boundaryI = Number.POSITIVE_INFINITY;
427
+ for (const t of segmentTokens) {
428
+ if (!t || typeof t.i !== 'number' || t.i <= predToken.i) continue;
429
+ const surface = String(t.surface || '').toLowerCase();
430
+ const tag = String((((t || {}).pos) || {}).tag || '').toUpperCase();
431
+ if (boundarySurfaceSet.has(surface) || tag === 'WDT' || tag === 'WP' || tag === 'WP$' || tag === 'WRB') {
432
+ boundaryI = t.i;
433
+ break;
434
+ }
435
+ }
436
+
437
+ function mentionTokenBounds(mentionId) {
438
+ const m = mentionById.get(mentionId);
439
+ if (!m || !Array.isArray(m.token_ids)) return null;
440
+ const toks = m.token_ids.map((tid) => tokenById.get(tid)).filter(Boolean);
441
+ if (toks.length === 0) return null;
442
+ const minI = Math.min(...toks.map((t) => t.i));
443
+ const maxI = Math.max(...toks.map((t) => t.i));
444
+ return { minI, maxI, tokenCount: toks.length, spanLen: Number((m.span || {}).end || 0) - Number((m.span || {}).start || 0) };
445
+ }
446
+
447
+ function isBeforeBoundary(mentionId) {
448
+ if (!Number.isFinite(boundaryI)) return true;
449
+ const b = mentionTokenBounds(mentionId);
450
+ if (!b) return false;
451
+ return b.maxI < boundaryI;
452
+ }
453
+
454
+ const rawThemeIds = normalizeIds((roleBuckets.theme || []).filter((id) => typeof id === 'string' && id.length > 0));
455
+ const themeBeforeBoundary = rawThemeIds.filter((id) => isBeforeBoundary(id));
456
+ const themeCandidates = themeBeforeBoundary.length > 0 ? themeBeforeBoundary : rawThemeIds;
457
+ if (themeCandidates.length > 0) {
458
+ themeCandidates.sort((a, b) => {
459
+ const ba = mentionTokenBounds(a);
460
+ const bb = mentionTokenBounds(b);
461
+ const aMin = ba ? ba.minI : Number.MAX_SAFE_INTEGER;
462
+ const bMin = bb ? bb.minI : Number.MAX_SAFE_INTEGER;
463
+ if (aMin !== bMin) return aMin - bMin;
464
+ const aCount = ba ? ba.tokenCount : Number.MAX_SAFE_INTEGER;
465
+ const bCount = bb ? bb.tokenCount : Number.MAX_SAFE_INTEGER;
466
+ if (aCount !== bCount) return aCount - bCount;
467
+ const aSpan = ba ? ba.spanLen : Number.MAX_SAFE_INTEGER;
468
+ const bSpan = bb ? bb.spanLen : Number.MAX_SAFE_INTEGER;
469
+ if (aSpan !== bSpan) return aSpan - bSpan;
470
+ return a.localeCompare(b);
471
+ });
472
+ roleBuckets.theme = [themeCandidates[0]];
473
+ }
474
+
475
+ roleBuckets.attr = normalizeIds((roleBuckets.attr || []).filter((id) => isBeforeBoundary(id)));
476
+ roleBuckets.topic = normalizeIds((roleBuckets.topic || []).filter((id) => isBeforeBoundary(id)));
477
+ roleBuckets.location = normalizeIds((roleBuckets.location || []).filter((id) => isBeforeBoundary(id)));
478
+ if (Array.isArray(roleBuckets.other)) {
479
+ const cleanedOther = [];
480
+ for (const entry of roleBuckets.other) {
481
+ if (!entry || typeof entry.role !== 'string') continue;
482
+ const kept = normalizeIds((entry.mention_ids || []).filter((id) => isBeforeBoundary(id)));
483
+ if (kept.length === 0) continue;
484
+ cleanedOther.push({ role: entry.role, mention_ids: kept });
485
+ }
486
+ cleanedOther.sort((a, b) => String(a.role || '').localeCompare(String(b.role || '')));
487
+ roleBuckets.other = cleanedOther;
488
+ }
489
+ applyRoleBucketsToAssertion(assertion, roleBuckets, mentionById);
490
+ }
491
+
492
+ function trimCatchAllThemeBuckets(assertion, mentionById, tokenById, tokensBySegment) {
493
+ if (!assertion || !assertion.predicate) return;
494
+ const roleBuckets = assertionRoleBuckets(assertion);
495
+ const rawThemeIds = normalizeIds((roleBuckets.theme || []).filter((id) => typeof id === 'string' && id.length > 0));
496
+ if (rawThemeIds.length === 0) return;
497
+
498
+ const predMentionId = String((((assertion || {}).predicate) || {}).mention_id || '');
499
+ const predMention = mentionById.get(predMentionId);
500
+ if (!predMention) return;
501
+ const predToken = tokenById.get(predMention.head_token_id);
502
+ const predTag = String((((predToken || {}).pos) || {}).tag || '');
503
+ const predIsVerb = isVerbPosTag(predTag);
504
+ const predTokenIds = new Set([String(predMention.head_token_id || '')].filter((id) => id.length > 0));
505
+ const segTokens = tokensBySegment.get(predMention.segment_id) || [];
506
+ const tokenByI = new Map(segTokens.map((t) => [Number(t.i), t]));
507
+
508
+ function mentionInfo(mid) {
509
+ const m = mentionById.get(mid);
510
+ if (!m || !Array.isArray(m.token_ids)) return null;
511
+ const toks = m.token_ids.map((tid) => tokenById.get(tid)).filter(Boolean).sort((a, b) => a.i - b.i);
512
+ if (toks.length === 0) return null;
513
+ const ids = toks.map((t) => t.id);
514
+ const hasVerb = toks.some((t) => isVerbPosTag(String((((t || {}).pos) || {}).tag || '')));
515
+ const hasForeignVerb = toks.some((t) => isVerbPosTag(String((((t || {}).pos) || {}).tag || '')) && !predTokenIds.has(String(t.id || '')));
516
+ const hasClauseMarkers = toks.some((t) => {
517
+ const surf = String(t.surface || '').toLowerCase();
518
+ const tag = String((((t || {}).pos) || {}).tag || '').toUpperCase();
519
+ return surf === 'that' || surf === 'which' || surf === 'who' || surf === 'where' || surf === 'before' || surf === 'while' || surf === ',' || tag === ',';
520
+ });
521
+ return {
522
+ mention: m,
523
+ tokenIds: ids,
524
+ startI: toks[0].i,
525
+ tokenCount: toks.length,
526
+ hasVerb,
527
+ hasForeignVerb,
528
+ hasClauseMarkers,
529
+ };
530
+ }
531
+
532
+ function pickTrimmedThemeCandidate(themeInfo) {
533
+ const themeSet = new Set(themeInfo.tokenIds);
534
+ const candidates = [];
535
+ for (const m of mentionById.values()) {
536
+ if (!m || m.segment_id !== predMention.segment_id || m.id === themeInfo.mention.id) continue;
537
+ const mids = Array.isArray(m.token_ids) ? m.token_ids : [];
538
+ if (mids.length === 0) continue;
539
+ if (!mids.every((tid) => themeSet.has(tid))) continue;
540
+ const toks = mids.map((tid) => tokenById.get(tid)).filter(Boolean).sort((a, b) => a.i - b.i);
541
+ if (toks.length === 0) continue;
542
+ const hasVerb = toks.some((t) => isVerbPosTag(String((((t || {}).pos) || {}).tag || '')));
543
+ if (hasVerb) continue;
544
+ const headTok = tokenById.get(m.head_token_id);
545
+ const headTag = String((((headTok || {}).pos) || {}).tag || '');
546
+ if (!isNounLikePosTag(headTag) && headTag !== 'PRP' && headTag !== 'PRP$') continue;
547
+ const minI = toks[0].i;
548
+ const prevTok = tokenByI.get(minI - 1);
549
+ const prevTag = String((((prevTok || {}).pos) || {}).tag || '').toUpperCase();
550
+ const prepPenalty = (prevTag === 'IN' || prevTag === 'TO') ? 1 : 0;
551
+ const preferBeforeForPassive = String((((predToken || {}).pos) || {}).tag || '') === 'VBN';
552
+ const afterPredicate = predToken && Number.isFinite(predToken.i)
553
+ ? (preferBeforeForPassive ? (minI < predToken.i ? 0 : 1) : (minI > predToken.i ? 0 : 1))
554
+ : 1;
555
+ candidates.push({
556
+ mentionId: m.id,
557
+ tokenCount: toks.length,
558
+ prepPenalty,
559
+ afterPredicate,
560
+ startI: minI,
561
+ });
562
+ }
563
+ candidates.sort((a, b) => {
564
+ if (a.afterPredicate !== b.afterPredicate) return a.afterPredicate - b.afterPredicate;
565
+ if (a.prepPenalty !== b.prepPenalty) return a.prepPenalty - b.prepPenalty;
566
+ if (a.tokenCount !== b.tokenCount) return b.tokenCount - a.tokenCount;
567
+ if (a.startI !== b.startI) return a.startI - b.startI;
568
+ return a.mentionId.localeCompare(b.mentionId);
569
+ });
570
+ return candidates.length > 0 ? candidates[0].mentionId : null;
571
+ }
572
+
573
+ const cleanedTheme = [];
574
+ for (const mid of rawThemeIds) {
575
+ const info = mentionInfo(mid);
576
+ if (!info) continue;
577
+
578
+ if (mid === predMentionId) {
579
+ continue;
580
+ }
581
+
582
+ if (predIsVerb && info.hasForeignVerb) {
583
+ continue;
584
+ }
585
+
586
+ const oversized = info.tokenCount >= 5 && (info.hasClauseMarkers || info.hasVerb);
587
+ if (oversized) {
588
+ const trimmed = pickTrimmedThemeCandidate(info);
589
+ if (trimmed) cleanedTheme.push(trimmed);
590
+ continue;
591
+ }
592
+
593
+ cleanedTheme.push(mid);
594
+ }
595
+
596
+ roleBuckets.theme = normalizeIds(cleanedTheme);
597
+ applyRoleBucketsToAssertion(assertion, roleBuckets, mentionById);
598
+ }
599
+
600
+ function buildSuppressionEligibilityTrace({ source, assertions, tokenById, clauseKeyByAssertionId }) {
601
+ const sourceSlots = assertionRoleBuckets(source);
602
+ if (!source || !source.predicate) return null;
603
+ const sourceCls = String((((source || {}).diagnostics) || {}).predicate_class || '');
604
+ if (!(sourceCls === 'preposition' || sourceCls === 'nominal_head' || sourceCls === 'auxiliary' || sourceCls === 'copula')) return null;
605
+ const sourceTok = tokenById.get(source.predicate.head_token_id);
606
+ if (!sourceTok) return null;
607
+
608
+ const sourceActorIds = normalizeIds((sourceSlots.actor || []).filter((id) => typeof id === 'string' && id.length > 0));
609
+ const sourceThemeIds = normalizeIds((sourceSlots.theme || []).filter((id) => typeof id === 'string' && id.length > 0));
610
+ const sourceAttrIds = normalizeIds((sourceSlots.attr || []).filter((id) => typeof id === 'string' && id.length > 0));
611
+ const sourceTopicIds = normalizeIds((sourceSlots.topic || []).filter((id) => typeof id === 'string' && id.length > 0));
612
+ const sourceLocationIds = normalizeIds((sourceSlots.location || []).filter((id) => typeof id === 'string' && id.length > 0));
613
+ const sourceOther = Array.isArray(sourceSlots.other) ? sourceSlots.other : [];
614
+ const sourceOps = Array.isArray(source.operators) ? source.operators : [];
615
+ const sourcePredicateMentionId = String((((source || {}).predicate) || {}).mention_id || '');
616
+ const selfShapedActor =
617
+ sourceActorIds.length === 0 ||
618
+ (sourceActorIds.length === 1 && sourceActorIds[0] === sourcePredicateMentionId);
619
+ const noCoreSlots =
620
+ sourceActorIds.length === 0 &&
621
+ sourceThemeIds.length === 0 &&
622
+ sourceAttrIds.length === 0 &&
623
+ sourceTopicIds.length === 0 &&
624
+ sourceLocationIds.length === 0;
625
+ const hasCoreSlots = !noCoreSlots;
626
+ const hasOtherResidue = sourceOther.length > 0;
627
+ const hasAnyOps = sourceOps.length > 0;
628
+ const hasOperatorResidue = hasAnyOps;
629
+ const residueModeValid = hasOtherResidue || hasOperatorResidue;
630
+ const hasBlockingOps = assertionHasBlockingOperators(source);
631
+ const hasCompareQuantOps = sourceOps.some((op) => {
632
+ const kind = String((op && op.kind) || '');
633
+ return kind === 'compare' || kind === 'compare_gt' || kind === 'compare_lt' || kind === 'quantifier';
634
+ });
635
+ const copulaLike = sourceCls === 'copula' || isCopulaSurface(sourceTok.surface || '');
636
+ const nominalEligible =
637
+ sourceCls === 'nominal_head' &&
638
+ (String((((source || {}).diagnostics) || {}).predicate_quality || '') === 'ok' ||
639
+ String((((source || {}).diagnostics) || {}).predicate_quality || '') === 'low') &&
640
+ selfShapedActor &&
641
+ noCoreSlots &&
642
+ residueModeValid;
643
+ const prepositionEligible = sourceCls === 'preposition' && hasAnyOps === false;
644
+ const lowAuxiliaryCarrierEligible =
645
+ sourceCls === 'auxiliary' &&
646
+ String((((source || {}).diagnostics) || {}).predicate_quality || '') === 'low' &&
647
+ selfShapedActor &&
648
+ !hasCompareQuantOps;
649
+ let copulaEligible = false;
650
+ if (copulaLike && String((((source || {}).diagnostics) || {}).predicate_quality || '') === 'low' && !hasCompareQuantOps) {
651
+ const disallowedOps = sourceOps.some((op) => {
652
+ const kind = String((op && op.kind) || '');
653
+ return (
654
+ kind === 'modality' ||
655
+ kind === 'negation' ||
656
+ kind === 'coordination_group' ||
657
+ kind === 'control_inherit_subject' ||
658
+ kind === 'control_propagation'
659
+ );
660
+ });
661
+ const hasAttachableSlots =
662
+ ((sourceSlots.theme || []).length > 0) ||
663
+ ((sourceSlots.attr || []).length > 0) ||
664
+ ((sourceSlots.other || []).length > 0);
665
+ copulaEligible = !disallowedOps && hasAttachableSlots;
666
+ }
667
+ let boundedPrepositionEligible = false;
668
+ if (sourceCls === 'preposition' && noCoreSlots) {
669
+ const disallowedOps = sourceOps.some((op) => {
670
+ const kind = String((op && op.kind) || '');
671
+ return kind === 'modality' || kind === 'negation';
672
+ });
673
+ boundedPrepositionEligible = !disallowedOps;
674
+ }
675
+ const anyEligibleClass = nominalEligible || prepositionEligible || copulaEligible || boundedPrepositionEligible || lowAuxiliaryCarrierEligible;
676
+
677
+ const sourceClause = clauseKeyByAssertionId.get(source.id);
678
+ const segmentHostCandidates = (assertions || [])
679
+ .filter((host) => {
680
+ if (!host || host.id === source.id) return false;
681
+ if (host.segment_id !== source.segment_id) return false;
682
+ return String((((host || {}).diagnostics) || {}).predicate_class || '') === 'lexical_verb';
683
+ })
684
+ .map((host) => {
685
+ const hostTok = tokenById.get(host.predicate.head_token_id);
686
+ if (!hostTok) return null;
687
+ return {
688
+ host,
689
+ distance: Math.abs(Number(sourceTok.i) - Number(hostTok.i)),
690
+ };
691
+ })
692
+ .filter(Boolean)
693
+ .sort((a, b) => {
694
+ if (a.distance !== b.distance) return a.distance - b.distance;
695
+ return a.host.id.localeCompare(b.host.id);
696
+ });
697
+ const clauseHostCandidates = segmentHostCandidates.filter(
698
+ (cand) => clauseKeyByAssertionId.get(cand.host.id) === sourceClause
699
+ );
700
+ let hostPool = clauseHostCandidates;
701
+ if (hostPool.length === 0 && noCoreSlots) {
702
+ hostPool = segmentHostCandidates;
703
+ }
704
+ const chosen = hostPool.length > 0 ? hostPool[0] : null;
705
+ const chosenHost = chosen ? chosen.host : null;
706
+
707
+ const sourceEvidenceTokenIds = normalizeIds((((source || {}).evidence || {}).token_ids || []).filter((id) => typeof id === 'string' && id.length > 0));
708
+ const sourceOperatorTokenIds = new Set(
709
+ sourceOps
710
+ .map((op) => String((op && op.token_id) || ''))
711
+ .filter((id) => id.length > 0)
712
+ );
713
+ const sourceEvidenceNonOperatorTokenIds = sourceEvidenceTokenIds.filter((id) => !sourceOperatorTokenIds.has(id));
714
+ const chosenHostTokenIds = normalizeIds((((chosenHost || {}).evidence || {}).token_ids || []).filter((id) => typeof id === 'string' && id.length > 0));
715
+ const hostEvidenceSet = new Set(chosenHostTokenIds);
716
+ const missingInHostTokenIds = sourceEvidenceNonOperatorTokenIds.filter((id) => !hostEvidenceSet.has(id));
717
+ const containmentPass = !!chosenHost && missingInHostTokenIds.length === 0;
718
+
719
+ const containmentRequired = boundedPrepositionEligible || (nominalEligible && !noCoreSlots) || (lowAuxiliaryCarrierEligible && !noCoreSlots);
720
+ const blockedByOps = hasBlockingOps && !boundedPrepositionEligible && !lowAuxiliaryCarrierEligible && !nominalEligible;
721
+ const hasTransferableResidue = hasOtherResidue || hasOperatorResidue;
722
+ const eligible =
723
+ anyEligibleClass &&
724
+ !blockedByOps &&
725
+ !hasCoreSlots &&
726
+ !!chosenHost &&
727
+ (!containmentRequired || containmentPass) &&
728
+ hasTransferableResidue;
729
+ let failureReason = null;
730
+ if (!eligible) {
731
+ if (hasCoreSlots) failureReason = 'has_core_slots';
732
+ else if (!chosenHost) failureReason = 'no_host';
733
+ else if (containmentRequired && !containmentPass) failureReason = 'no_containment';
734
+ else failureReason = 'no_containment';
735
+ }
736
+ const chosenHostTok = chosenHost ? tokenById.get(chosenHost.predicate.head_token_id) : null;
737
+
738
+ return {
739
+ eligible,
740
+ failure_reason: failureReason,
741
+ candidate_class: sourceCls,
742
+ segment_id: String(source.segment_id || ''),
743
+ assertion_id: String(source.id || ''),
744
+ chosen_host_assertion_id: chosenHost ? String(chosenHost.id || '') : null,
745
+ chosen_host_predicate: chosenHostTok ? String(chosenHostTok.surface || '') : null,
746
+ chosen_host_predicate_class: chosenHost ? String(((((chosenHost || {}).diagnostics) || {}).predicate_class) || '') : null,
747
+ source_non_operator_token_ids: sourceEvidenceNonOperatorTokenIds,
748
+ chosen_host_token_ids: chosenHostTokenIds,
749
+ missing_in_host_token_ids: missingInHostTokenIds,
750
+ };
751
+ }
752
+
753
+ function suppressRoleCarrierAssertions({ assertions, tokenById, tokensBySegment, mentionById }) {
754
+ const out = Array.isArray(assertions) ? assertions.slice() : [];
755
+ const byId = new Map(out.map((a) => [a.id, a]));
756
+ const clauseKeyByAssertionId = new Map();
757
+ const suppressedIds = new Set();
758
+ const traces = [];
759
+
760
+ for (const a of out) {
761
+ clauseKeyByAssertionId.set(a.id, assertionClauseWindowKey(a, tokenById, tokensBySegment));
762
+ }
763
+
764
+ const candidates = out.slice().sort((a, b) => a.id.localeCompare(b.id));
765
+
766
+ for (const source of candidates) {
767
+ if (suppressedIds.has(source.id)) continue;
768
+ if (!byId.has(source.id)) continue;
769
+ const sourceSlots = assertionRoleBuckets(source);
770
+ const sourceCls = String((((source || {}).diagnostics) || {}).predicate_class || '');
771
+ const sourceTok = tokenById.get(source.predicate.head_token_id);
772
+ if (!sourceTok) continue;
773
+ const sourceClause = clauseKeyByAssertionId.get(source.id);
774
+ const segmentHostCandidates = out.filter((host) => {
775
+ if (!host || host.id === source.id) return false;
776
+ if (suppressedIds.has(host.id)) return false;
777
+ if (host.segment_id !== source.segment_id) return false;
778
+ if (String((((host || {}).diagnostics) || {}).predicate_class || '') !== 'lexical_verb') return false;
779
+ return true;
780
+ }).map((host) => {
781
+ const hostTok = tokenById.get(host.predicate.head_token_id);
782
+ if (!hostTok) return null;
783
+ const distance = Math.abs(Number(sourceTok.i) - Number(hostTok.i));
784
+ return { host, hostTok, distance };
785
+ }).filter(Boolean);
786
+ if (segmentHostCandidates.length === 0) continue;
787
+ const hostCandidates = segmentHostCandidates.filter(
788
+ (cand) => clauseKeyByAssertionId.get(cand.host.id) === sourceClause
789
+ );
790
+ hostCandidates.sort((a, b) => {
791
+ if (a.distance !== b.distance) return a.distance - b.distance;
792
+ return a.host.id.localeCompare(b.host.id);
793
+ });
794
+ const sourceActorIds = normalizeIds((sourceSlots.actor || []).filter((id) => typeof id === 'string' && id.length > 0));
795
+ const sourceThemeIds = normalizeIds((sourceSlots.theme || []).filter((id) => typeof id === 'string' && id.length > 0));
796
+ const sourceAttrIds = normalizeIds((sourceSlots.attr || []).filter((id) => typeof id === 'string' && id.length > 0));
797
+ const sourceTopicIds = normalizeIds((sourceSlots.topic || []).filter((id) => typeof id === 'string' && id.length > 0));
798
+ const sourceLocationIds = normalizeIds((sourceSlots.location || []).filter((id) => typeof id === 'string' && id.length > 0));
799
+ const sourceOther = Array.isArray(sourceSlots.other) ? sourceSlots.other : [];
800
+ const copulaLike = sourceCls === 'copula' || isCopulaSurface(sourceTok.surface || '');
801
+ const sourceOps = Array.isArray(source.operators) ? source.operators : [];
802
+ const hasBlockingOps = assertionHasBlockingOperators(source);
803
+ const hasCompareQuantOps = sourceOps.some((op) => {
804
+ const kind = String((op && op.kind) || '');
805
+ return kind === 'compare' || kind === 'compare_gt' || kind === 'compare_lt' || kind === 'quantifier';
806
+ });
807
+ const hasAnyOps = sourceOps.length > 0;
808
+ const sourcePredicateMentionId = String((((source || {}).predicate) || {}).mention_id || '');
809
+ const selfShapedActor =
810
+ sourceActorIds.length === 0 ||
811
+ (sourceActorIds.length === 1 && sourceActorIds[0] === sourcePredicateMentionId);
812
+ const noCoreSlots =
813
+ sourceActorIds.length === 0 &&
814
+ sourceThemeIds.length === 0 &&
815
+ sourceAttrIds.length === 0 &&
816
+ sourceTopicIds.length === 0 &&
817
+ sourceLocationIds.length === 0;
818
+ const hasOtherResidue = sourceOther.length > 0;
819
+ const hasOperatorResidue = hasAnyOps;
820
+ const residueModeValid = hasOtherResidue || hasOperatorResidue;
821
+ const nominalEligible =
822
+ sourceCls === 'nominal_head' &&
823
+ (String((((source || {}).diagnostics) || {}).predicate_quality || '') === 'ok' ||
824
+ String((((source || {}).diagnostics) || {}).predicate_quality || '') === 'low') &&
825
+ selfShapedActor &&
826
+ noCoreSlots &&
827
+ residueModeValid;
828
+ const prepositionEligible = sourceCls === 'preposition' && hasAnyOps === false;
829
+ const lowAuxiliaryCarrierEligible =
830
+ sourceCls === 'auxiliary' &&
831
+ String((((source || {}).diagnostics) || {}).predicate_quality || '') === 'low' &&
832
+ selfShapedActor &&
833
+ !hasCompareQuantOps;
834
+ let copulaEligible = false;
835
+ if (copulaLike && String((((source || {}).diagnostics) || {}).predicate_quality || '') === 'low' && !hasCompareQuantOps) {
836
+ const disallowedOps = sourceOps.some((op) => {
837
+ const kind = String((op && op.kind) || '');
838
+ return (
839
+ kind === 'modality' ||
840
+ kind === 'negation' ||
841
+ kind === 'coordination_group' ||
842
+ kind === 'control_inherit_subject' ||
843
+ kind === 'control_propagation'
844
+ );
845
+ });
846
+ const hasAttachableSlots =
847
+ ((sourceSlots.theme || []).length > 0) ||
848
+ ((sourceSlots.attr || []).length > 0) ||
849
+ ((sourceSlots.other || []).length > 0);
850
+ copulaEligible = !disallowedOps && hasAttachableSlots;
851
+ }
852
+ let boundedPrepositionEligible = false;
853
+ if (sourceCls === 'preposition' && noCoreSlots) {
854
+ const disallowedOps = sourceOps.some((op) => {
855
+ const kind = String((op && op.kind) || '');
856
+ return kind === 'modality' || kind === 'negation';
857
+ });
858
+ boundedPrepositionEligible = !disallowedOps;
859
+ }
860
+ if (!nominalEligible && !prepositionEligible && !copulaEligible && !boundedPrepositionEligible && !lowAuxiliaryCarrierEligible) continue;
861
+ if (hasBlockingOps && !boundedPrepositionEligible && !lowAuxiliaryCarrierEligible && !nominalEligible) continue;
862
+
863
+ const sourceEvidenceTokenIds = normalizeIds((((source || {}).evidence || {}).token_ids || []).filter((id) => typeof id === 'string' && id.length > 0));
864
+ const sourceOperatorTokenIds = new Set(
865
+ sourceOps
866
+ .map((op) => String((op && op.token_id) || ''))
867
+ .filter((id) => id.length > 0)
868
+ );
869
+ const sourceEvidenceNonOperatorTokenIds = sourceEvidenceTokenIds.filter((id) => !sourceOperatorTokenIds.has(id));
870
+
871
+ let hostPool = hostCandidates.slice();
872
+ const containmentRequired = boundedPrepositionEligible || (nominalEligible && !noCoreSlots) || (lowAuxiliaryCarrierEligible && !noCoreSlots);
873
+ if (containmentRequired) {
874
+ hostPool = hostPool.filter((cand) => {
875
+ const hostEvidenceIds = new Set(
876
+ normalizeIds((((cand.host || {}).evidence || {}).token_ids || []).filter((id) => typeof id === 'string' && id.length > 0))
877
+ );
878
+ return sourceEvidenceNonOperatorTokenIds.every((id) => hostEvidenceIds.has(id));
879
+ });
880
+ if (hostPool.length === 0 && noCoreSlots) {
881
+ hostPool = segmentHostCandidates.filter((cand) => {
882
+ const hostEvidenceIds = new Set(
883
+ normalizeIds((((cand.host || {}).evidence || {}).token_ids || []).filter((id) => typeof id === 'string' && id.length > 0))
884
+ );
885
+ return sourceEvidenceNonOperatorTokenIds.every((id) => hostEvidenceIds.has(id));
886
+ });
887
+ }
888
+ if (hostPool.length === 0) continue;
889
+ hostPool.sort((a, b) => {
890
+ if (a.distance !== b.distance) return a.distance - b.distance;
891
+ return a.host.id.localeCompare(b.host.id);
892
+ });
893
+ }
894
+
895
+ if (hostPool.length === 0) continue;
896
+ let chosen = hostPool[0];
897
+ if (copulaEligible) {
898
+ const scored = hostPool.map((h) => {
899
+ const hostRefs = collectAssertionMentionRefs(h.host);
900
+ const sharedActor = sourceActorIds.some((id) => hostRefs.has(id));
901
+ return { ...h, sharedActor };
902
+ });
903
+ const withShared = scored.filter((x) => x.sharedActor);
904
+ const pool = withShared.length > 0 ? withShared : (sourceActorIds.length === 0 ? scored : []);
905
+ if (pool.length === 0) continue;
906
+ pool.sort((a, b) => {
907
+ if (a.distance !== b.distance) return a.distance - b.distance;
908
+ return a.host.id.localeCompare(b.host.id);
909
+ });
910
+ chosen = pool[0];
911
+ }
912
+ const host = chosen.host;
913
+ if (!host || !byId.has(host.id)) continue;
914
+
915
+ let transfer;
916
+ let reason;
917
+ if (copulaEligible) {
918
+ transfer = transferNamedBucketsToHostOther(source, host, [
919
+ { from: 'theme', to: 'attached_copula_theme' },
920
+ { from: 'attr', to: 'attached_copula_attr' },
921
+ { from: 'other', to: 'attached_copula_other' },
922
+ ], mentionById);
923
+ reason = 'copula_bucket_sink_suppressed';
924
+ } else if (nominalEligible) {
925
+ transfer = transferNamedBucketsToHostOther(source, host, [
926
+ { from: 'theme', to: 'attached_theme' },
927
+ { from: 'attr', to: 'attached_attr' },
928
+ { from: 'topic', to: 'attached_topic' },
929
+ { from: 'location', to: 'attached_location' },
930
+ { from: 'other', to: 'attached_other' },
931
+ ], mentionById);
932
+ const transferredOperatorKinds = transferOperatorsToHost(source, host);
933
+ if (transferredOperatorKinds.length > 0) {
934
+ transfer.transferred_buckets = normalizeIds(
935
+ (transfer.transferred_buckets || []).concat(transferredOperatorKinds.map((k) => `operator:${k}`))
936
+ );
937
+ }
938
+ reason = 'role_carrier_suppressed_v2_nominal';
939
+ } else if (lowAuxiliaryCarrierEligible) {
940
+ transfer = transferNamedBucketsToHostOther(source, host, [
941
+ { from: 'theme', to: 'attached_theme' },
942
+ { from: 'attr', to: 'attached_attr' },
943
+ { from: 'topic', to: 'attached_topic' },
944
+ { from: 'location', to: 'attached_location' },
945
+ { from: 'other', to: 'attached_other' },
946
+ ], mentionById);
947
+ const transferredOperatorKinds = transferOperatorsToHost(source, host);
948
+ if (transferredOperatorKinds.length > 0) {
949
+ transfer.transferred_buckets = normalizeIds(
950
+ (transfer.transferred_buckets || []).concat(transferredOperatorKinds.map((k) => 'operator:' + k))
951
+ );
952
+ }
953
+ reason = 'role_carrier_suppressed';
954
+ } else if (boundedPrepositionEligible) {
955
+ transfer = transferNamedBucketsToHostOther(source, host, [
956
+ { from: 'other', to: 'attached_other' },
957
+ ], mentionById);
958
+ const transferredOperatorKinds = transferOperatorsToHost(source, host);
959
+ if (transferredOperatorKinds.length > 0) {
960
+ transfer.transferred_buckets = normalizeIds(
961
+ (transfer.transferred_buckets || []).concat(transferredOperatorKinds.map((k) => 'operator:' + k))
962
+ );
963
+ }
964
+ reason = 'role_carrier_suppressed';
965
+ } else {
966
+ transfer = {
967
+ transferred_buckets: transferRoleCarrierBucketsToHost(source, host, mentionById),
968
+ transferred_mention_ids: collectRoleBucketMentionIds(source, ['actor', 'theme', 'attr', 'topic', 'location', 'other']),
969
+ };
970
+ reason = 'role_carrier_suppressed';
971
+ }
972
+ const hasTransferredMentions = Array.isArray(transfer && transfer.transferred_mention_ids) && transfer.transferred_mention_ids.length > 0;
973
+ const hasTransferredOperatorResidue =
974
+ Array.isArray(transfer && transfer.transferred_buckets) &&
975
+ transfer.transferred_buckets.some((x) => typeof x === 'string' && x.startsWith('operator:'));
976
+ const sourceMentionIds = collectRoleBucketMentionIds(source, ['actor', 'theme', 'attr', 'topic', 'location', 'other']);
977
+ const hostMentionRefs = collectAssertionMentionRefs(host);
978
+ const sourceMentionResidue = sourceMentionIds.filter((id) => !hostMentionRefs.has(id));
979
+ const redundantCarrier =
980
+ (copulaEligible || nominalEligible || lowAuxiliaryCarrierEligible) &&
981
+ sourceMentionResidue.length === 0 &&
982
+ !hasTransferredOperatorResidue;
983
+ if (!transfer || (!hasTransferredMentions && !hasTransferredOperatorResidue && !redundantCarrier)) {
984
+ continue;
985
+ }
986
+ const hostEvidenceTokenIds = new Set((((host || {}).evidence) || {}).token_ids || []);
987
+ for (const tid of ((((source || {}).evidence) || {}).token_ids || [])) hostEvidenceTokenIds.add(tid);
988
+ hostEvidenceTokenIds.add(source.predicate.head_token_id);
989
+ hostEvidenceTokenIds.add(host.predicate.head_token_id);
990
+ if (!host.evidence || typeof host.evidence !== 'object') host.evidence = {};
991
+ host.evidence.token_ids = normalizeIds(Array.from(hostEvidenceTokenIds));
992
+
993
+ const tokenIds = new Set((source.evidence && source.evidence.token_ids) || []);
994
+ tokenIds.add(source.predicate.head_token_id);
995
+ tokenIds.add(host.predicate.head_token_id);
996
+ traces.push({
997
+ id: source.id,
998
+ segment_id: source.segment_id,
999
+ predicate: {
1000
+ mention_id: source.predicate.mention_id,
1001
+ head_token_id: source.predicate.head_token_id,
1002
+ },
1003
+ diagnostics: {
1004
+ predicate_quality: String((((source || {}).diagnostics) || {}).predicate_quality || ''),
1005
+ suppressed_by: {
1006
+ kind: 'predicate_redirect',
1007
+ target_assertion_id: host.id,
1008
+ reason,
1009
+ evidence: {
1010
+ upstream_relation_ids: [],
1011
+ token_ids: normalizeIds(Array.from(tokenIds)),
1012
+ },
1013
+ },
1014
+ },
1015
+ suppressed_assertion_id: source.id,
1016
+ host_assertion_id: host.id,
1017
+ reason,
1018
+ predicate_class: sourceCls,
1019
+ transferred_buckets: transfer.transferred_buckets || [],
1020
+ transferred_mention_ids: transfer.transferred_mention_ids || [],
1021
+ evidence: {
1022
+ token_ids: normalizeIds(Array.from(tokenIds)),
1023
+ },
1024
+ });
1025
+ suppressedIds.add(source.id);
1026
+ }
1027
+
1028
+ const kept = out.filter((a) => !suppressedIds.has(a.id));
1029
+ traces.sort((a, b) => a.id.localeCompare(b.id));
1030
+ return { assertions: kept, suppressedTraces: traces };
1031
+ }
1032
+
1033
+ function choosePredicateUpgradeCandidate(currentPredicateTokenId, assertionRelations, tokenById) {
1034
+ const currentTok = tokenById.get(currentPredicateTokenId);
1035
+ if (!currentTok) return null;
1036
+
1037
+ const byToken = new Map();
1038
+ const rels = Array.isArray(assertionRelations) ? assertionRelations : [];
1039
+ const clauseLinkLabels = new Set(['complement_clause', 'xcomp']);
1040
+
1041
+ function consider(tokenId, cls, relationId) {
1042
+ if (typeof tokenId !== 'string' || tokenId.length === 0) return;
1043
+ if (tokenId === currentPredicateTokenId) return;
1044
+ const tok = tokenById.get(tokenId);
1045
+ if (!tok) return;
1046
+ if (tok.segment_id !== currentTok.segment_id) return;
1047
+ const tag = String((((tok || {}).pos) || {}).tag || '');
1048
+ if (!isLexicalVerbPos(tag)) return;
1049
+ if (!byToken.has(tokenId)) {
1050
+ byToken.set(tokenId, { token_id: tokenId, class_priority: cls, upstream_relation_ids: new Set() });
1051
+ }
1052
+ const item = byToken.get(tokenId);
1053
+ if (cls < item.class_priority) item.class_priority = cls;
1054
+ if (typeof relationId === 'string' && relationId.length > 0) item.upstream_relation_ids.add(relationId);
1055
+ }
1056
+
1057
+ for (const rel of rels) {
1058
+ if (!rel || typeof rel !== 'object') continue;
1059
+ const ev = rel.evidence && typeof rel.evidence === 'object' ? rel.evidence : {};
1060
+ const relId = typeof rel.relation_id === 'string' ? rel.relation_id : '';
1061
+ if (ev.pattern === 'modality_unified' && typeof ev.chosen_predicate_token_id === 'string') {
1062
+ consider(ev.chosen_predicate_token_id, 1, relId);
1063
+ }
1064
+ if (ev.pattern === 'copula_frame' && typeof ev.verb_token_id === 'string') {
1065
+ consider(ev.verb_token_id, 2, relId);
1066
+ }
1067
+ if (clauseLinkLabels.has(String(rel.label || ''))) {
1068
+ consider(rel.dep_token_id, 3, relId);
1069
+ }
1070
+ }
1071
+
1072
+ const candidates = Array.from(byToken.values()).sort((a, b) => {
1073
+ if (a.class_priority !== b.class_priority) return a.class_priority - b.class_priority;
1074
+ const ta = tokenById.get(a.token_id);
1075
+ const tb = tokenById.get(b.token_id);
1076
+ if (ta.i !== tb.i) return ta.i - tb.i;
1077
+ return a.token_id.localeCompare(b.token_id);
1078
+ });
1079
+ if (candidates.length === 0) return null;
1080
+ const selected = candidates[0];
1081
+ return {
1082
+ token_id: selected.token_id,
1083
+ upstream_relation_ids: normalizeIds(Array.from(selected.upstream_relation_ids)),
1084
+ };
1085
+ }
1086
+
1087
+ function mergeModalityCopulaAssertions({ assertions, projected, mentionById, tokenById }) {
1088
+ const linkLabels = new Set(['complement_clause', 'xcomp']);
1089
+ const byAssertionId = new Map((assertions || []).map((a) => [a.id, a]));
1090
+ const suppressedById = new Set();
1091
+ const suppressedTraces = [];
1092
+
1093
+ function findLinkRelationIds(fromMentionId, toMentionId) {
1094
+ const out = [];
1095
+ for (const rel of projected || []) {
1096
+ if (!rel || !linkLabels.has(String(rel.label || ''))) continue;
1097
+ const direct = rel.head_mention_id === fromMentionId && rel.dep_mention_id === toMentionId;
1098
+ const reverse = rel.head_mention_id === toMentionId && rel.dep_mention_id === fromMentionId;
1099
+ if (!direct && !reverse) continue;
1100
+ if (typeof rel.relation_id === 'string' && rel.relation_id.length > 0) out.push(rel.relation_id);
1101
+ }
1102
+ return normalizeIds(out);
1103
+ }
1104
+
1105
+ function modalityOperators(a) {
1106
+ return (Array.isArray(a && a.operators) ? a.operators : []).filter((op) => op && op.kind === 'modality');
1107
+ }
1108
+
1109
+ function nonModalityOperators(a) {
1110
+ return (Array.isArray(a && a.operators) ? a.operators : []).filter((op) => op && op.kind !== 'modality');
1111
+ }
1112
+
1113
+ const candidates = (assertions || []).filter((a) => {
1114
+ if (!a || !a.predicate || typeof a.predicate.mention_id !== 'string') return false;
1115
+ const q = (((a || {}).diagnostics || {}).predicate_quality) || '';
1116
+ if (q !== 'low') return false;
1117
+ const predMention = mentionById.get(a.predicate.mention_id);
1118
+ const predTok = predMention ? tokenById.get(predMention.head_token_id) : null;
1119
+ if (!isLowQualityPredicateToken(predTok)) return false;
1120
+ if (modalityOperators(a).length === 0) return false;
1121
+ if (nonModalityOperators(a).length > 0) return false;
1122
+ if (!roleBucketsAreSemanticallyEmpty(assertionRoleBuckets(a))) return false;
1123
+ return true;
1124
+ }).sort((a, b) => a.id.localeCompare(b.id));
1125
+
1126
+ for (const source of candidates) {
1127
+ if (suppressedById.has(source.id)) continue;
1128
+ const sourcePredMention = mentionById.get(source.predicate.mention_id);
1129
+ const sourceHeadTok = sourcePredMention ? tokenById.get(sourcePredMention.head_token_id) : null;
1130
+ if (!sourcePredMention || !sourceHeadTok) continue;
1131
+
1132
+ const targetCandidates = [];
1133
+ for (const target of assertions || []) {
1134
+ if (!target || target.id === source.id) continue;
1135
+ if (suppressedById.has(target.id)) continue;
1136
+ if (target.segment_id !== source.segment_id) continue;
1137
+ const tq = (((target || {}).diagnostics || {}).predicate_quality) || '';
1138
+ if (tq === 'low') continue;
1139
+ const targetMention = mentionById.get(target.predicate.mention_id);
1140
+ const targetTok = targetMention ? tokenById.get(targetMention.head_token_id) : null;
1141
+ const targetTag = String((((targetTok || {}).pos) || {}).tag || '');
1142
+ if (!targetTok || !isLexicalVerbPos(targetTag)) continue;
1143
+ const linkageIds = findLinkRelationIds(source.predicate.mention_id, target.predicate.mention_id);
1144
+ if (linkageIds.length === 0) continue;
1145
+ targetCandidates.push({
1146
+ target,
1147
+ targetMention,
1148
+ targetTok,
1149
+ linkageIds,
1150
+ });
1151
+ }
1152
+
1153
+ targetCandidates.sort((a, b) => {
1154
+ if (a.targetTok.i !== b.targetTok.i) return a.targetTok.i - b.targetTok.i;
1155
+ if (a.targetTok.id !== b.targetTok.id) return a.targetTok.id.localeCompare(b.targetTok.id);
1156
+ return a.target.id.localeCompare(b.target.id);
1157
+ });
1158
+ if (targetCandidates.length === 0) continue;
1159
+ const chosen = targetCandidates[0];
1160
+
1161
+ const mergedOps = new Map();
1162
+ for (const op of chosen.target.operators || []) {
1163
+ mergeOperator(mergedOps, op);
1164
+ }
1165
+ for (const op of modalityOperators(source)) {
1166
+ mergeOperator(mergedOps, op);
1167
+ }
1168
+ chosen.target.operators = Array.from(mergedOps.values())
1169
+ .map((op) => ({ ...op, evidence: dedupeAndSortEvidence(op.evidence || []) }))
1170
+ .sort((a, b) => {
1171
+ if (a.kind !== b.kind) return a.kind.localeCompare(b.kind);
1172
+ if ((a.value || '') !== (b.value || '')) return (a.value || '').localeCompare(b.value || '');
1173
+ if ((a.group_id || '') !== (b.group_id || '')) return (a.group_id || '').localeCompare(b.group_id || '');
1174
+ if ((a.token_id || '') !== (b.token_id || '')) return (a.token_id || '').localeCompare(b.token_id || '');
1175
+ return (a.role || '').localeCompare(b.role || '');
1176
+ });
1177
+
1178
+ const tokenIds = new Set([
1179
+ source.predicate.head_token_id,
1180
+ chosen.target.predicate.head_token_id,
1181
+ ]);
1182
+ for (const op of modalityOperators(source)) {
1183
+ for (const ev of op.evidence || []) {
1184
+ if (ev && typeof ev.to_token_id === 'string') tokenIds.add(ev.to_token_id);
1185
+ }
1186
+ }
1187
+ suppressedById.add(source.id);
1188
+ suppressedTraces.push({
1189
+ id: source.id,
1190
+ segment_id: source.segment_id,
1191
+ predicate: {
1192
+ mention_id: source.predicate.mention_id,
1193
+ head_token_id: source.predicate.head_token_id,
1194
+ },
1195
+ diagnostics: {
1196
+ predicate_quality: (((source || {}).diagnostics || {}).predicate_quality) || 'low',
1197
+ suppressed_by: {
1198
+ kind: 'predicate_redirect',
1199
+ target_assertion_id: chosen.target.id,
1200
+ reason: 'modality_moved_to_lexical',
1201
+ evidence: {
1202
+ upstream_relation_ids: chosen.linkageIds,
1203
+ token_ids: normalizeIds(Array.from(tokenIds)),
1204
+ },
1205
+ },
1206
+ },
1207
+ });
1208
+ }
1209
+
1210
+ const kept = (assertions || []).filter((a) => !suppressedById.has(a.id));
1211
+ kept.sort((a, b) => {
1212
+ if (a.segment_id !== b.segment_id) return a.segment_id.localeCompare(b.segment_id);
1213
+ const pa = tokenById.get(a.predicate.head_token_id);
1214
+ const pb = tokenById.get(b.predicate.head_token_id);
1215
+ if (pa.span.start !== pb.span.start) return pa.span.start - pb.span.start;
1216
+ return a.id.localeCompare(b.id);
1217
+ });
1218
+ suppressedTraces.sort((a, b) => a.id.localeCompare(b.id));
1219
+
1220
+ return { assertions: kept, suppressedTraces };
1221
+ }
1222
+
1223
+ function buildAssertions({ projected, mentionById, tokenById }) {
1224
+ const byPredicate = new Map();
1225
+ for (const p of projected) {
1226
+ if (!byPredicate.has(p.head_mention_id)) byPredicate.set(p.head_mention_id, []);
1227
+ byPredicate.get(p.head_mention_id).push(p);
1228
+ }
1229
+ const coordGroups = buildCoordinationGroups(projected);
1230
+ const coordEvidenceByMention = new Map();
1231
+ for (const p of projected) {
1232
+ if (p.label !== 'coordination') continue;
1233
+ const evidenceItem = {
1234
+ annotation_id: p.relation_id || 'r:unknown',
1235
+ from_token_id: p.head_token_id,
1236
+ to_token_id: p.dep_token_id,
1237
+ label: p.label,
1238
+ };
1239
+ for (const mentionId of [p.head_mention_id, p.dep_mention_id]) {
1240
+ if (!coordEvidenceByMention.has(mentionId)) coordEvidenceByMention.set(mentionId, []);
1241
+ coordEvidenceByMention.get(mentionId).push(evidenceItem);
1242
+ }
1243
+ }
1244
+
1245
+ const assertions = [];
1246
+ const suppressedAssertions = [];
1247
+ const coveredMentions = new Set();
1248
+ const tokenMentionIds = new Map();
1249
+ for (const m of mentionById.values()) {
1250
+ for (const tid of m.token_ids || []) {
1251
+ if (!tokenMentionIds.has(tid)) tokenMentionIds.set(tid, []);
1252
+ tokenMentionIds.get(tid).push(m.id);
1253
+ }
1254
+ }
1255
+ for (const ids of tokenMentionIds.values()) ids.sort((a, b) => a.localeCompare(b));
1256
+
1257
+ const bySegment = new Map();
1258
+ for (const t of tokenById.values()) {
1259
+ if (!bySegment.has(t.segment_id)) bySegment.set(t.segment_id, []);
1260
+ bySegment.get(t.segment_id).push(t);
1261
+ }
1262
+ for (const arr of bySegment.values()) arr.sort((a, b) => a.i - b.i);
1263
+
1264
+ const primaryMentionsBySegment = new Map();
1265
+ for (const m of mentionById.values()) {
1266
+ if (!m || !m.is_primary) continue;
1267
+ if (!primaryMentionsBySegment.has(m.segment_id)) primaryMentionsBySegment.set(m.segment_id, []);
1268
+ primaryMentionsBySegment.get(m.segment_id).push(m);
1269
+ }
1270
+ for (const arr of primaryMentionsBySegment.values()) {
1271
+ arr.sort((a, b) => {
1272
+ if (a.span.start !== b.span.start) return a.span.start - b.span.start;
1273
+ if (a.span.end !== b.span.end) return a.span.end - b.span.end;
1274
+ return a.id.localeCompare(b.id);
1275
+ });
1276
+ }
1277
+
1278
+ const projectedMentionIds = new Set();
1279
+ for (const p of projected) {
1280
+ projectedMentionIds.add(p.head_mention_id);
1281
+ projectedMentionIds.add(p.dep_mention_id);
1282
+ }
1283
+
1284
+ const coordEdges = projected.filter((p) => p.label === 'coordination');
1285
+ const coordEvidenceByGroup = new Map();
1286
+ for (const edge of coordEdges) {
1287
+ const gid = coordGroups.get(edge.head_mention_id) || coordGroups.get(edge.dep_mention_id);
1288
+ if (!gid) continue;
1289
+ if (!coordEvidenceByGroup.has(gid)) coordEvidenceByGroup.set(gid, []);
1290
+ coordEvidenceByGroup.get(gid).push({
1291
+ annotation_id: edge.relation_id || 'r:unknown',
1292
+ from_token_id: edge.head_token_id,
1293
+ to_token_id: edge.dep_token_id,
1294
+ label: edge.label,
1295
+ });
1296
+ }
1297
+
1298
+ function mentionStartI(mention) {
1299
+ const ids = Array.isArray(mention && mention.token_ids) ? mention.token_ids : [];
1300
+ const toks = ids.map((id) => tokenById.get(id)).filter(Boolean);
1301
+ if (toks.length === 0) return Number.MAX_SAFE_INTEGER;
1302
+ return Math.min(...toks.map((t) => t.i));
1303
+ }
1304
+
1305
+ function mentionHasVerbToken(mention) {
1306
+ const ids = Array.isArray(mention && mention.token_ids) ? mention.token_ids : [];
1307
+ return ids.some((id) => isVerbPosTag((((tokenById.get(id) || {}).pos) || {}).tag));
1308
+ }
1309
+
1310
+ function chooseThemeMentionForPredicateToken(predTok, existingIds) {
1311
+ const segMentions = primaryMentionsBySegment.get(predTok.segment_id) || [];
1312
+ const used = new Set(existingIds || []);
1313
+ const candidates = segMentions.filter((m) => {
1314
+ if (used.has(m.id) || m.id === `m:${predTok.segment_id}:${predTok.span.start}-${predTok.span.end}:token`) return false;
1315
+ if (!m.span || m.span.start < predTok.span.end) return false;
1316
+ const startI = mentionStartI(m);
1317
+ if (!Number.isFinite(startI) || startI - predTok.i > 8) return false;
1318
+ const headTok = tokenById.get(m.head_token_id);
1319
+ const headTag = String((((headTok || {}).pos) || {}).tag || '');
1320
+ if (m.kind === 'mwe' || isNounLikePosTag(headTag)) return true;
1321
+ return false;
1322
+ });
1323
+ candidates.sort((a, b) => {
1324
+ const aSpanLen = Array.isArray(a && a.token_ids) ? a.token_ids.length : Number.MAX_SAFE_INTEGER;
1325
+ const bSpanLen = Array.isArray(b && b.token_ids) ? b.token_ids.length : Number.MAX_SAFE_INTEGER;
1326
+ if (aSpanLen !== bSpanLen) return aSpanLen - bSpanLen;
1327
+ const da = Math.abs(mentionStartI(a) - predTok.i);
1328
+ const db = Math.abs(mentionStartI(b) - predTok.i);
1329
+ if (da !== db) return da - db;
1330
+ if (a.span.start !== b.span.start) return a.span.start - b.span.start;
1331
+ return a.id.localeCompare(b.id);
1332
+ });
1333
+ return candidates.length > 0 ? candidates[0] : null;
1334
+ }
1335
+
1336
+ const spatialPrepMap = new Set(['in', 'into', 'on', 'onto', 'at', 'to', 'from', 'inside', 'within', 'under', 'over', 'near']);
1337
+ function chooseLocationMentionForPredicateToken(predTok, existingIds) {
1338
+ const segToks = bySegment.get(predTok.segment_id) || [];
1339
+ const idx = segToks.findIndex((t) => t.id === predTok.id);
1340
+ if (idx < 0) return null;
1341
+ const used = new Set(existingIds || []);
1342
+ for (let i = idx + 1; i < Math.min(segToks.length, idx + 9); i += 1) {
1343
+ const t = segToks[i];
1344
+ const surface = String(t.surface || '').toLowerCase();
1345
+ const tag = String((((t || {}).pos) || {}).tag || '');
1346
+ if (tag !== 'IN' || !spatialPrepMap.has(surface)) continue;
1347
+ const segMentions = primaryMentionsBySegment.get(predTok.segment_id) || [];
1348
+ const cands = segMentions.filter((m) => {
1349
+ if (used.has(m.id)) return false;
1350
+ const startI = mentionStartI(m);
1351
+ if (!Number.isFinite(startI) || startI <= t.i || startI - t.i > 5) return false;
1352
+ const headTok = tokenById.get(m.head_token_id);
1353
+ const headTag = String((((headTok || {}).pos) || {}).tag || '');
1354
+ return m.kind === 'mwe' || isNounLikePosTag(headTag);
1355
+ });
1356
+ cands.sort((a, b) => {
1357
+ const aSpanLen = Array.isArray(a && a.token_ids) ? a.token_ids.length : Number.MAX_SAFE_INTEGER;
1358
+ const bSpanLen = Array.isArray(b && b.token_ids) ? b.token_ids.length : Number.MAX_SAFE_INTEGER;
1359
+ if (aSpanLen !== bSpanLen) return aSpanLen - bSpanLen;
1360
+ const da = mentionStartI(a) - t.i;
1361
+ const db = mentionStartI(b) - t.i;
1362
+ if (da !== db) return da - db;
1363
+ return a.id.localeCompare(b.id);
1364
+ });
1365
+ if (cands.length > 0) return cands[0];
1366
+ }
1367
+ return null;
1368
+ }
1369
+
1370
+ function mentionOverlapsTokenSet(mentionId, tokenIdSet) {
1371
+ if (!mentionId || !tokenIdSet || tokenIdSet.size === 0) return false;
1372
+ const mention = mentionById.get(mentionId);
1373
+ if (!mention || !Array.isArray(mention.token_ids)) return false;
1374
+ return mention.token_ids.some((tid) => tokenIdSet.has(tid));
1375
+ }
1376
+
1377
+ function chooseMentionForToken(tokenId, segmentId, excludeMentionId, excludeTokenIds, preferMinimalSpan) {
1378
+ const candidateMentionIds = (tokenMentionIds.get(tokenId) || []).filter((id) => {
1379
+ if (!excludeTokenIds || excludeTokenIds.size === 0) return true;
1380
+ return !mentionOverlapsTokenSet(id, excludeTokenIds);
1381
+ });
1382
+ return chooseBestMentionForToken({
1383
+ tokenId,
1384
+ segmentId,
1385
+ mentionById,
1386
+ candidateMentionIds,
1387
+ excludeMentionId: excludeMentionId || null,
1388
+ preferMinimalSpan: preferMinimalSpan === true,
1389
+ });
1390
+ }
1391
+
1392
+ const predicateIds = Array.from(byPredicate.keys()).sort((a, b) => {
1393
+ const ma = mentionById.get(a);
1394
+ const mb = mentionById.get(b);
1395
+ if (ma.segment_id !== mb.segment_id) return ma.segment_id.localeCompare(mb.segment_id);
1396
+ if (ma.span.start !== mb.span.start) return ma.span.start - mb.span.start;
1397
+ return a.localeCompare(b);
1398
+ });
1399
+
1400
+ for (const predId of predicateIds) {
1401
+ const originalPredMention = mentionById.get(predId);
1402
+ if (!originalPredMention) continue;
1403
+ const rels = byPredicate.get(predId) || [];
1404
+ rels.sort((a, b) => {
1405
+ const ta = tokenById.get(a.dep_token_id);
1406
+ const tb = tokenById.get(b.dep_token_id);
1407
+ if (ta.span.start !== tb.span.start) return ta.span.start - tb.span.start;
1408
+ if (a.label !== b.label) return a.label.localeCompare(b.label);
1409
+ return a.relation_id.localeCompare(b.relation_id);
1410
+ });
1411
+
1412
+ let effectivePredMention = originalPredMention;
1413
+ let effectivePredId = predId;
1414
+ let suppressionPointer = null;
1415
+ let predicateUpgradeApplied = false;
1416
+ const originalPredTok = tokenById.get(originalPredMention.head_token_id);
1417
+ const originalPredicateClass = classifyPredicateClass(originalPredTok);
1418
+ if (
1419
+ isLowQualityPredicateToken(originalPredTok) ||
1420
+ originalPredicateClass === 'preposition' ||
1421
+ originalPredicateClass === 'nominal_head'
1422
+ ) {
1423
+ const upgrade = choosePredicateUpgradeCandidate(originalPredMention.head_token_id, rels, tokenById);
1424
+ if (upgrade && typeof upgrade.token_id === 'string') {
1425
+ const upgradeMentionId = chooseMentionForToken(
1426
+ upgrade.token_id,
1427
+ originalPredMention.segment_id,
1428
+ null,
1429
+ null,
1430
+ false
1431
+ ).mention_id;
1432
+ const upgradeMention = upgradeMentionId ? mentionById.get(upgradeMentionId) : null;
1433
+ if (upgradeMention) {
1434
+ effectivePredMention = upgradeMention;
1435
+ effectivePredId = upgradeMention.id;
1436
+ predicateUpgradeApplied = true;
1437
+ suppressionPointer = {
1438
+ kind: 'predicate_redirect',
1439
+ reason: 'predicate_upgraded_to_lexical',
1440
+ upstream_relation_ids: upgrade.upstream_relation_ids || [],
1441
+ };
1442
+ }
1443
+ }
1444
+ }
1445
+ if (originalPredicateClass === 'preposition' && !predicateUpgradeApplied) {
1446
+ continue;
1447
+ }
1448
+ if (isMakeSureScaffoldPredicate({ predTok: originalPredTok, projected, tokensBySegment: bySegment })) {
1449
+ coveredMentions.add(predId);
1450
+ continue;
1451
+ }
1452
+
1453
+ const roleBuckets = {
1454
+ actor: [],
1455
+ theme: [],
1456
+ attr: [],
1457
+ topic: [],
1458
+ location: [],
1459
+ other: [],
1460
+ };
1461
+ const otherRoleMap = new Map();
1462
+ const operatorsByKey = new Map();
1463
+ const evidenceItems = [];
1464
+ const evidenceTokenIds = new Set(effectivePredMention.token_ids);
1465
+ const effectivePredTokForOverlap = tokenById.get(effectivePredMention.head_token_id);
1466
+ const effectivePredTagForOverlap = String((((effectivePredTokForOverlap || {}).pos) || {}).tag || '');
1467
+ const predicateTokenIdSet = isVerbPosTag(effectivePredTagForOverlap)
1468
+ ? new Set(effectivePredMention.token_ids || [])
1469
+ : new Set();
1470
+ const applyStrictThemeClauseGate = effectivePredTagForOverlap === 'VBN';
1471
+ let bucketProjectionChoice = null;
1472
+ const segTokensForClause = bySegment.get(effectivePredMention.segment_id) || [];
1473
+ const predIdxForClause = segTokensForClause.findIndex((t) => t && t.id === effectivePredMention.head_token_id);
1474
+ let clauseLeftI = Number.NEGATIVE_INFINITY;
1475
+ let clauseRightI = Number.POSITIVE_INFINITY;
1476
+ if (predIdxForClause >= 0) {
1477
+ let left = predIdxForClause;
1478
+ let right = predIdxForClause;
1479
+ while (left - 1 >= 0 && !isClauseBoundaryToken(segTokensForClause[left - 1])) left -= 1;
1480
+ while (right + 1 < segTokensForClause.length && !isClauseBoundaryToken(segTokensForClause[right + 1])) right += 1;
1481
+ clauseLeftI = Number(segTokensForClause[left].i);
1482
+ clauseRightI = Number(segTokensForClause[right].i);
1483
+ }
1484
+ function mentionInsidePredicateClause(mentionId) {
1485
+ const mention = mentionById.get(mentionId);
1486
+ if (!mention || !Array.isArray(mention.token_ids) || mention.token_ids.length === 0) return true;
1487
+ for (const tid of mention.token_ids) {
1488
+ const tok = tokenById.get(tid);
1489
+ if (!tok || typeof tok.i !== 'number') continue;
1490
+ if (tok.i < clauseLeftI || tok.i > clauseRightI) return false;
1491
+ }
1492
+ return true;
1493
+ }
1494
+
1495
+ for (const r of rels) {
1496
+ const evidenceItem = {
1497
+ annotation_id: r.relation_id || 'r:unknown',
1498
+ from_token_id: r.head_token_id,
1499
+ to_token_id: r.dep_token_id,
1500
+ label: r.label,
1501
+ };
1502
+ evidenceItems.push(evidenceItem);
1503
+ evidenceTokenIds.add(r.dep_token_id);
1504
+ const depMentionPick = chooseMentionForToken(
1505
+ r.dep_token_id,
1506
+ effectivePredMention.segment_id,
1507
+ effectivePredId,
1508
+ predicateTokenIdSet,
1509
+ true
1510
+ );
1511
+ const depMentionIdFromPick = depMentionPick && typeof depMentionPick.mention_id === 'string'
1512
+ ? depMentionPick.mention_id
1513
+ : null;
1514
+ const depMentionIdFromRelation = typeof r.dep_mention_id === 'string' ? r.dep_mention_id : null;
1515
+ let depMentionIdForSlot = depMentionIdFromPick || depMentionIdFromRelation;
1516
+ if (mentionOverlapsTokenSet(depMentionIdForSlot, predicateTokenIdSet)) {
1517
+ depMentionIdForSlot = null;
1518
+ }
1519
+ const depMention = depMentionIdForSlot ? mentionById.get(depMentionIdForSlot) : null;
1520
+ if (depMention) {
1521
+ for (const tid of depMention.token_ids) evidenceTokenIds.add(tid);
1522
+ }
1523
+
1524
+ if (r.label === 'modality') {
1525
+ const t = tokenById.get(r.dep_token_id);
1526
+ mergeOperator(operatorsByKey, {
1527
+ kind: 'modality',
1528
+ value: t ? t.surface : '',
1529
+ evidence: [evidenceItem],
1530
+ });
1531
+ if (depMentionIdForSlot) coveredMentions.add(depMentionIdForSlot);
1532
+ continue;
1533
+ }
1534
+ if (r.label === 'negation') {
1535
+ mergeOperator(operatorsByKey, {
1536
+ kind: 'negation',
1537
+ token_id: r.dep_token_id,
1538
+ evidence: [evidenceItem],
1539
+ });
1540
+ if (depMentionIdForSlot) coveredMentions.add(depMentionIdForSlot);
1541
+ continue;
1542
+ }
1543
+ if (r.label === 'coordination') {
1544
+ const gid = coordGroups.get(effectivePredId) || `cg:${sha256Hex(`${effectivePredId}|${depMentionIdForSlot}`).slice(0, 12)}`;
1545
+ const coordType = r.evidence && (
1546
+ r.evidence.coord_type ||
1547
+ r.evidence.coordination_type ||
1548
+ r.evidence.coordinator_type
1549
+ );
1550
+ mergeOperator(operatorsByKey, {
1551
+ kind: 'coordination_group',
1552
+ group_id: gid,
1553
+ value: typeof coordType === 'string' && coordType.length > 0 ? String(coordType).toLowerCase() : undefined,
1554
+ evidence: [evidenceItem],
1555
+ });
1556
+ if (depMentionIdForSlot) coveredMentions.add(depMentionIdForSlot);
1557
+ continue;
1558
+ }
1559
+ if (isCompareLabel(r.label)) {
1560
+ mergeOperator(operatorsByKey, {
1561
+ kind: r.label,
1562
+ token_id: r.dep_token_id,
1563
+ evidence: [evidenceItem],
1564
+ });
1565
+ if (depMentionIdForSlot) coveredMentions.add(depMentionIdForSlot);
1566
+ continue;
1567
+ }
1568
+ if (isQuantifierLabel(r.label)) {
1569
+ const depTok = tokenById.get(r.dep_token_id);
1570
+ mergeOperator(operatorsByKey, {
1571
+ kind: 'quantifier',
1572
+ token_id: r.dep_token_id,
1573
+ value: depTok && typeof depTok.surface === 'string' ? depTok.surface.toLowerCase() : '',
1574
+ evidence: [evidenceItem],
1575
+ });
1576
+ if (depMentionIdForSlot) coveredMentions.add(depMentionIdForSlot);
1577
+ continue;
1578
+ }
1579
+ if (r.label === 'complement_clause' || (r.evidence && r.evidence.pattern === 'control_inherit_subject')) {
1580
+ mergeOperator(operatorsByKey, {
1581
+ kind: 'control_inherit_subject',
1582
+ evidence: [evidenceItem],
1583
+ });
1584
+ }
1585
+ if (r.label === 'purpose' || (r.evidence && r.evidence.pattern === 'control_propagation')) {
1586
+ mergeOperator(operatorsByKey, {
1587
+ kind: 'control_propagation',
1588
+ evidence: [evidenceItem],
1589
+ });
1590
+ }
1591
+
1592
+ const map = roleToSlot(r.label);
1593
+ if (
1594
+ depMentionPick.candidate_count >= 2 &&
1595
+ depMentionIdForSlot &&
1596
+ depMentionPick.chosen_was_first === false &&
1597
+ !bucketProjectionChoice
1598
+ ) {
1599
+ bucketProjectionChoice = {
1600
+ candidate_count: depMentionPick.candidate_count,
1601
+ chosen_mention_id: depMentionIdForSlot,
1602
+ };
1603
+ }
1604
+ if (!depMentionIdForSlot) {
1605
+ continue;
1606
+ }
1607
+ if (map.slot === 'theme' && applyStrictThemeClauseGate && !mentionInsidePredicateClause(depMentionIdForSlot)) {
1608
+ continue;
1609
+ }
1610
+ if (map.slot === 'other') {
1611
+ const key = map.role || r.label;
1612
+ if (!otherRoleMap.has(key)) otherRoleMap.set(key, new Set());
1613
+ otherRoleMap.get(key).add(depMentionIdForSlot);
1614
+ } else {
1615
+ roleBuckets[map.slot].push(depMentionIdForSlot);
1616
+ }
1617
+ coveredMentions.add(depMentionIdForSlot);
1618
+ }
1619
+
1620
+ if (coordGroups.has(effectivePredId)) {
1621
+ mergeOperator(operatorsByKey, {
1622
+ kind: 'coordination_group',
1623
+ group_id: coordGroups.get(effectivePredId),
1624
+ evidence: dedupeAndSortEvidence(coordEvidenceByMention.get(effectivePredId) || []),
1625
+ });
1626
+ }
1627
+
1628
+ roleBuckets.actor = normalizeIds(roleBuckets.actor);
1629
+ roleBuckets.theme = normalizeIds(roleBuckets.theme);
1630
+ roleBuckets.attr = normalizeIds(roleBuckets.attr);
1631
+ roleBuckets.topic = normalizeIds(roleBuckets.topic);
1632
+ roleBuckets.location = normalizeIds(roleBuckets.location);
1633
+ roleBuckets.other = Array.from(otherRoleMap.entries())
1634
+ .sort((a, b) => a[0].localeCompare(b[0]))
1635
+ .map(([role, ids]) => ({ role, mention_ids: normalizeIds(Array.from(ids)) }));
1636
+
1637
+ const copulaSet = new Set(['is', 'are', 'was', 'were', 'be', 'been', 'being']);
1638
+ if (roleBuckets.attr.length === 0 && roleBuckets.theme.length === 0) {
1639
+ const predTok = tokenById.get(effectivePredMention.head_token_id);
1640
+ const predSurface = String(predTok && predTok.surface ? predTok.surface : '').toLowerCase();
1641
+ if (predTok && copulaSet.has(predSurface)) {
1642
+ const segTokens = bySegment.get(predTok.segment_id) || [];
1643
+ const idx = segTokens.findIndex((t) => t.id === predTok.id);
1644
+ if (idx >= 0) {
1645
+ const lookahead = segTokens.slice(idx + 1, idx + 4);
1646
+ const jj = lookahead.find((t) => /^(JJ|JJR|JJS)$/.test(String((t.pos || {}).tag || '')));
1647
+ if (jj) {
1648
+ const midPick = chooseMentionForToken(
1649
+ jj.id,
1650
+ effectivePredMention.segment_id,
1651
+ effectivePredId,
1652
+ predicateTokenIdSet,
1653
+ true
1654
+ );
1655
+ const mid = midPick && typeof midPick.mention_id === 'string' ? midPick.mention_id : null;
1656
+ if (mid) {
1657
+ roleBuckets.attr.push(mid);
1658
+ coveredMentions.add(mid);
1659
+ }
1660
+ }
1661
+ }
1662
+ }
1663
+ }
1664
+
1665
+ {
1666
+ const predTok = tokenById.get(effectivePredMention.head_token_id);
1667
+ const predSurface = String(predTok && predTok.surface ? predTok.surface : '').toLowerCase();
1668
+ const predTag = String((((predTok || {}).pos) || {}).tag || '');
1669
+ if (predTok && isVerbPosTag(predTag) && !isCopulaSurface(predSurface)) {
1670
+ if (roleBuckets.theme.length === 0) {
1671
+ const theme = chooseThemeMentionForPredicateToken(predTok, roleBuckets.actor.concat(roleBuckets.attr, roleBuckets.topic, roleBuckets.location));
1672
+ if (theme && projectedMentionIds.has(theme.id)) {
1673
+ roleBuckets.theme = normalizeIds(roleBuckets.theme.concat([theme.id]));
1674
+ coveredMentions.add(theme.id);
1675
+ }
1676
+ }
1677
+ if (roleBuckets.location.length === 0) {
1678
+ const location = chooseLocationMentionForPredicateToken(predTok, roleBuckets.actor.concat(roleBuckets.theme, roleBuckets.attr, roleBuckets.topic));
1679
+ if (location && projectedMentionIds.has(location.id)) {
1680
+ roleBuckets.location = normalizeIds(roleBuckets.location.concat([location.id]));
1681
+ coveredMentions.add(location.id);
1682
+ }
1683
+ }
1684
+ }
1685
+ }
1686
+ const predTokForDiagnostics = tokenById.get(effectivePredMention.head_token_id);
1687
+ const workingAssertion = {
1688
+ predicate: { mention_id: effectivePredId },
1689
+ diagnostics: { predicate_quality: isLowQualityPredicateToken(predTokForDiagnostics) ? 'low' : 'ok' },
1690
+ };
1691
+ applyRoleBucketsToAssertion(workingAssertion, {
1692
+ actor: roleBuckets.actor.slice(),
1693
+ theme: roleBuckets.theme.slice(),
1694
+ attr: roleBuckets.attr.slice(),
1695
+ topic: roleBuckets.topic.slice(),
1696
+ location: roleBuckets.location.slice(),
1697
+ other: roleBuckets.other.map((o) => ({ role: o.role, mention_ids: (o.mention_ids || []).slice() })),
1698
+ }, mentionById);
1699
+ enforceCoreBucketTokenDisjointness(workingAssertion, mentionById, tokenById);
1700
+ pruneLowCopulaBuckets(workingAssertion, mentionById, tokenById, bySegment);
1701
+ trimCatchAllThemeBuckets(workingAssertion, mentionById, tokenById, bySegment);
1702
+ const workingSlots = assertionRoleBuckets(workingAssertion);
1703
+ roleBuckets.actor = normalizeIds(workingSlots.actor || []);
1704
+ roleBuckets.theme = normalizeIds(workingSlots.theme || []);
1705
+ roleBuckets.attr = normalizeIds(workingSlots.attr || []);
1706
+ roleBuckets.topic = normalizeIds(workingSlots.topic || []);
1707
+ roleBuckets.location = normalizeIds(workingSlots.location || []);
1708
+ roleBuckets.other = Array.isArray(workingSlots.other)
1709
+ ? workingSlots.other.map((o) => ({ role: o.role, mention_ids: normalizeIds(o.mention_ids || []) }))
1710
+ : [];
1711
+ const dedupOps = Array.from(operatorsByKey.values())
1712
+ .map((op) => ({
1713
+ ...op,
1714
+ evidence: dedupeAndSortEvidence(op.evidence || []),
1715
+ }))
1716
+ .sort((a, b) => {
1717
+ if (a.kind !== b.kind) return a.kind.localeCompare(b.kind);
1718
+ if ((a.value || '') !== (b.value || '')) return (a.value || '').localeCompare(b.value || '');
1719
+ if ((a.group_id || '') !== (b.group_id || '')) return (a.group_id || '').localeCompare(b.group_id || '');
1720
+ if ((a.token_id || '') !== (b.token_id || '')) return (a.token_id || '').localeCompare(b.token_id || '');
1721
+ return (a.role || '').localeCompare(b.role || '');
1722
+ });
1723
+
1724
+ evidenceItems.sort((a, b) => {
1725
+ if (a.from_token_id !== b.from_token_id) return a.from_token_id.localeCompare(b.from_token_id);
1726
+ if (a.to_token_id !== b.to_token_id) return a.to_token_id.localeCompare(b.to_token_id);
1727
+ if (a.label !== b.label) return a.label.localeCompare(b.label);
1728
+ return a.annotation_id.localeCompare(b.annotation_id);
1729
+ });
1730
+ const dedupEvidence = [];
1731
+ const seenEvidence = new Set();
1732
+ for (const e of evidenceItems) {
1733
+ const k = JSON.stringify(e);
1734
+ if (seenEvidence.has(k)) continue;
1735
+ seenEvidence.add(k);
1736
+ dedupEvidence.push(e);
1737
+ }
1738
+ if (dedupEvidence.length === 0) continue;
1739
+ const wikiSignals = buildAssertionWikiSignals({
1740
+ predicateMentionId: effectivePredId,
1741
+ relations: rels,
1742
+ mentionById,
1743
+ });
1744
+
1745
+ const predTok = tokenById.get(effectivePredMention.head_token_id);
1746
+ const predPos = predTok && predTok.pos && typeof predTok.pos.tag === 'string' ? predTok.pos.tag : '';
1747
+ const hasOnlyThemeBucket =
1748
+ roleBuckets.actor.length === 0 &&
1749
+ roleBuckets.attr.length === 0 &&
1750
+ roleBuckets.topic.length === 0 &&
1751
+ roleBuckets.location.length === 0 &&
1752
+ roleBuckets.other.length === 0 &&
1753
+ roleBuckets.theme.length > 0;
1754
+ const scaffoldGerundTokenPredicate =
1755
+ effectivePredMention.kind === 'token' &&
1756
+ predPos === 'VBG' &&
1757
+ hasOnlyThemeBucket &&
1758
+ dedupOps.length === 0;
1759
+ if (scaffoldGerundTokenPredicate) {
1760
+ continue;
1761
+ }
1762
+ const nominalCoordScaffoldPredicate =
1763
+ !isVerbPosTag(predPos) &&
1764
+ roleBucketsAreSemanticallyEmpty(roleBuckets) &&
1765
+ dedupOps.length > 0 &&
1766
+ dedupOps.every((op) => op.kind === 'coordination_group');
1767
+ if (nominalCoordScaffoldPredicate) {
1768
+ continue;
1769
+ }
1770
+ const nominalFragmentPredicate =
1771
+ originalPredicateClass === 'nominal_head' &&
1772
+ !isVerbPosTag(predPos) &&
1773
+ dedupOps.length === 0 &&
1774
+ (
1775
+ hasOnlyThemeBucket ||
1776
+ roleBucketsAreSemanticallyEmpty(roleBuckets) ||
1777
+ (
1778
+ roleBuckets.actor.length === 0 &&
1779
+ roleBuckets.theme.length === 0 &&
1780
+ roleBuckets.attr.length === 0 &&
1781
+ roleBuckets.topic.length === 0 &&
1782
+ roleBuckets.location.length === 0 &&
1783
+ roleBuckets.other.length > 0 &&
1784
+ roleBuckets.other.every((entry) => String((entry && entry.role) || '') === 'modifier')
1785
+ )
1786
+ );
1787
+ if (nominalFragmentPredicate) {
1788
+ continue;
1789
+ }
1790
+
1791
+ const rolePayloadForHash = canonicalizeRoleBuckets(roleBuckets, mentionById);
1792
+ const rolesForHash = rolePayloadHashInput(rolePayloadForHash);
1793
+ const opsForHash = JSON.stringify(canonicalizeOperatorsForHash(dedupOps));
1794
+ const id = `a:${effectivePredMention.segment_id}:${effectivePredId}:${sha256Hex(`${rolesForHash}|${opsForHash}`).slice(0, 12)}`;
1795
+ let sourceSuppressedAssertion = null;
1796
+ if (suppressionPointer) {
1797
+ const sourceId = `a:${originalPredMention.segment_id}:${predId}:${sha256Hex(`${rolesForHash}|${opsForHash}`).slice(0, 12)}`;
1798
+ sourceSuppressedAssertion = {
1799
+ id: sourceId,
1800
+ segment_id: originalPredMention.segment_id,
1801
+ predicate: {
1802
+ mention_id: predId,
1803
+ head_token_id: originalPredMention.head_token_id,
1804
+ },
1805
+ diagnostics: {
1806
+ predicate_quality: isLowQualityPredicateToken(originalPredTok) ? 'low' : 'ok',
1807
+ suppressed_by: {
1808
+ kind: 'predicate_redirect',
1809
+ target_assertion_id: id,
1810
+ reason: 'predicate_upgraded_to_lexical',
1811
+ evidence: {
1812
+ upstream_relation_ids: normalizeIds(suppressionPointer.upstream_relation_ids || []),
1813
+ token_ids: normalizeIds([originalPredMention.head_token_id, effectivePredMention.head_token_id]),
1814
+ },
1815
+ },
1816
+ },
1817
+ };
1818
+ }
1819
+
1820
+ assertions.push({
1821
+ id,
1822
+ segment_id: effectivePredMention.segment_id,
1823
+ predicate: {
1824
+ mention_id: effectivePredId,
1825
+ head_token_id: effectivePredMention.head_token_id,
1826
+ },
1827
+ arguments: rolePayloadForHash.arguments,
1828
+ modifiers: rolePayloadForHash.modifiers,
1829
+ operators: dedupOps,
1830
+ evidence: {
1831
+ relation_evidence: dedupEvidence,
1832
+ token_ids: normalizeIds(Array.from(evidenceTokenIds)),
1833
+ wiki_signals: wikiSignals || undefined,
1834
+ },
1835
+ diagnostics: {
1836
+ predicate_quality: isLowQualityPredicateToken(predTok) ? 'low' : 'ok',
1837
+ slot_projection_choice: bucketProjectionChoice || undefined,
1838
+ },
1839
+ });
1840
+ if (sourceSuppressedAssertion) suppressedAssertions.push(sourceSuppressedAssertion);
1841
+
1842
+ coveredMentions.add(effectivePredId);
1843
+ }
1844
+
1845
+ const assertedPredicateMentionIds = new Set(assertions.map((a) => a.predicate.mention_id));
1846
+ const syntheticPredicates = Array.from(mentionById.values())
1847
+ .filter((m) => m && m.is_primary && m.kind === 'token')
1848
+ .filter((m) => !assertedPredicateMentionIds.has(m.id))
1849
+ .sort((a, b) => {
1850
+ if (a.segment_id !== b.segment_id) return a.segment_id.localeCompare(b.segment_id);
1851
+ if (a.span.start !== b.span.start) return a.span.start - b.span.start;
1852
+ return a.id.localeCompare(b.id);
1853
+ });
1854
+
1855
+ for (const predMention of syntheticPredicates) {
1856
+ const predTok = tokenById.get(predMention.head_token_id);
1857
+ if (!predTok) continue;
1858
+ const predTag = String((((predTok || {}).pos) || {}).tag || '');
1859
+ const predSurface = String(predTok.surface || '').toLowerCase();
1860
+ if ((!isVerbPosTag(predTag) && predSurface !== 'complete') || isCopulaSurface(predSurface)) continue;
1861
+ if (predTag === 'VBG') continue;
1862
+ if (isMakeSureScaffoldPredicate({ predTok, projected, tokensBySegment: bySegment })) {
1863
+ coveredMentions.add(predMention.id);
1864
+ continue;
1865
+ }
1866
+
1867
+ const roleBuckets = { actor: [], theme: [], attr: [], topic: [], location: [], other: [] };
1868
+ const themeMention = chooseThemeMentionForPredicateToken(predTok, []);
1869
+ if (themeMention) roleBuckets.theme = [themeMention.id];
1870
+ const locationMention = chooseLocationMentionForPredicateToken(predTok, roleBuckets.theme);
1871
+ if (locationMention) roleBuckets.location = [locationMention.id];
1872
+
1873
+ const hasSupportingSlotEvidence =
1874
+ roleBuckets.theme.some((id) => projectedMentionIds.has(id)) ||
1875
+ roleBuckets.location.some((id) => projectedMentionIds.has(id));
1876
+ const predicateTouchesProjectedGraph = projected.some((r) => r && (r.head_token_id === predTok.id || r.dep_token_id === predTok.id));
1877
+ if (!hasSupportingSlotEvidence && !predicateTouchesProjectedGraph) continue;
1878
+
1879
+ const operatorsByKey = new Map();
1880
+ let coordGroupId = null;
1881
+ for (const themeId of roleBuckets.theme) {
1882
+ const gid = coordGroups.get(themeId);
1883
+ if (gid) {
1884
+ coordGroupId = gid;
1885
+ break;
1886
+ }
1887
+ }
1888
+ if (coordGroupId) {
1889
+ mergeOperator(operatorsByKey, {
1890
+ kind: 'coordination_group',
1891
+ group_id: coordGroupId,
1892
+ evidence: dedupeAndSortEvidence(coordEvidenceByGroup.get(coordGroupId) || []),
1893
+ });
1894
+ }
1895
+ const dedupOps = Array.from(operatorsByKey.values())
1896
+ .map((op) => ({ ...op, evidence: dedupeAndSortEvidence(op.evidence || []) }))
1897
+ .sort((a, b) => {
1898
+ if (a.kind !== b.kind) return a.kind.localeCompare(b.kind);
1899
+ return (a.group_id || '').localeCompare(b.group_id || '');
1900
+ });
1901
+
1902
+ const evidenceTokenIds = new Set(predMention.token_ids || []);
1903
+ for (const mid of roleBuckets.theme.concat(roleBuckets.location)) {
1904
+ const m = mentionById.get(mid);
1905
+ if (!m) continue;
1906
+ for (const tid of m.token_ids || []) evidenceTokenIds.add(tid);
1907
+ coveredMentions.add(mid);
1908
+ }
1909
+
1910
+ const wikiSignals = buildAssertionWikiSignals({
1911
+ predicateMentionId: predMention.id,
1912
+ relations: [],
1913
+ mentionById,
1914
+ });
1915
+ const syntheticTargetMentionId =
1916
+ roleBuckets.theme[0] || roleBuckets.location[0] || roleBuckets.actor[0] || roleBuckets.attr[0] || roleBuckets.topic[0] || null;
1917
+ const syntheticTargetMention = syntheticTargetMentionId ? mentionById.get(syntheticTargetMentionId) : null;
1918
+ const syntheticTargetTokenId =
1919
+ syntheticTargetMention && Array.isArray(syntheticTargetMention.token_ids) && syntheticTargetMention.token_ids.length > 0
1920
+ ? syntheticTargetMention.token_ids[0]
1921
+ : predMention.head_token_id;
1922
+ const syntheticRelationEvidence = [{
1923
+ annotation_id: `synthetic:step12:${predMention.head_token_id}`,
1924
+ from_token_id: predMention.head_token_id,
1925
+ to_token_id: syntheticTargetTokenId,
1926
+ label: 'synthetic_support',
1927
+ }];
1928
+ const rolePayloadForHash = canonicalizeRoleBuckets(roleBuckets, mentionById);
1929
+ const rolesForHash = rolePayloadHashInput(rolePayloadForHash);
1930
+ const opsForHash = JSON.stringify(canonicalizeOperatorsForHash(dedupOps));
1931
+ const id = `a:${predMention.segment_id}:${predMention.id}:${sha256Hex(`${rolesForHash}|${opsForHash}`).slice(0, 12)}`;
1932
+ assertions.push({
1933
+ id,
1934
+ segment_id: predMention.segment_id,
1935
+ predicate: {
1936
+ mention_id: predMention.id,
1937
+ head_token_id: predMention.head_token_id,
1938
+ },
1939
+ arguments: rolePayloadForHash.arguments,
1940
+ modifiers: rolePayloadForHash.modifiers,
1941
+ operators: dedupOps,
1942
+ evidence: {
1943
+ relation_evidence: syntheticRelationEvidence,
1944
+ token_ids: normalizeIds(Array.from(evidenceTokenIds)),
1945
+ wiki_signals: wikiSignals || undefined,
1946
+ },
1947
+ diagnostics: {
1948
+ predicate_quality: isLowQualityPredicateToken(predTok) ? 'low' : 'ok',
1949
+ },
1950
+ });
1951
+ coveredMentions.add(predMention.id);
1952
+ }
1953
+
1954
+ for (const a of assertions) {
1955
+ if (!a || !a.predicate) continue;
1956
+ const predTok = tokenById.get(a.predicate.head_token_id);
1957
+ const predicateClass = classifyPredicateClass(predTok);
1958
+ if (!a.diagnostics || typeof a.diagnostics !== 'object') a.diagnostics = {};
1959
+ a.diagnostics.predicate_class = predicateClass;
1960
+ }
1961
+
1962
+ assertions.sort((a, b) => {
1963
+ if (a.segment_id !== b.segment_id) return a.segment_id.localeCompare(b.segment_id);
1964
+ const pa = tokenById.get(a.predicate.head_token_id);
1965
+ const pb = tokenById.get(b.predicate.head_token_id);
1966
+ if (pa.span.start !== pb.span.start) return pa.span.start - pb.span.start;
1967
+ return a.id.localeCompare(b.id);
1968
+ });
1969
+
1970
+ const merged = mergeModalityCopulaAssertions({
1971
+ assertions,
1972
+ projected,
1973
+ mentionById,
1974
+ tokenById,
1975
+ });
1976
+
1977
+ for (const s of merged.suppressedTraces || []) {
1978
+ if (s && s.predicate && typeof s.predicate.mention_id === 'string') {
1979
+ coveredMentions.delete(s.predicate.mention_id);
1980
+ }
1981
+ }
1982
+
1983
+ const roleCarrierSuppressed = suppressRoleCarrierAssertions({
1984
+ assertions: merged.assertions,
1985
+ tokenById,
1986
+ tokensBySegment: bySegment,
1987
+ mentionById,
1988
+ });
1989
+
1990
+ const finalAssertions = roleCarrierSuppressed.assertions.slice();
1991
+ for (const a of finalAssertions) {
1992
+ dedupeOtherMentionsAgainstCoreBuckets(a, mentionById);
1993
+ }
1994
+ const assertedHeadTokenIds = new Set(
1995
+ finalAssertions
1996
+ .map((a) => String((((a || {}).predicate) || {}).head_token_id || ''))
1997
+ .filter((id) => id.length > 0)
1998
+ );
1999
+ const relationRefsByTokenId = new Map();
2000
+ const coordinationRefsByTokenId = new Map();
2001
+ const structuralOnlyLabels = new Set(['coordination', 'punctuation', 'complement_clause']);
2002
+ for (const rel of projected || []) {
2003
+ if (!rel || typeof rel !== 'object') continue;
2004
+ const label = String(rel.label || '');
2005
+ const headTokenId = String(rel.head_token_id || '');
2006
+ const depTokenId = String(rel.dep_token_id || '');
2007
+ if (headTokenId) {
2008
+ if (!relationRefsByTokenId.has(headTokenId)) relationRefsByTokenId.set(headTokenId, []);
2009
+ relationRefsByTokenId.get(headTokenId).push(rel);
2010
+ if (label === 'coordination') {
2011
+ if (!coordinationRefsByTokenId.has(headTokenId)) coordinationRefsByTokenId.set(headTokenId, []);
2012
+ coordinationRefsByTokenId.get(headTokenId).push(rel);
2013
+ }
2014
+ }
2015
+ if (depTokenId) {
2016
+ if (!relationRefsByTokenId.has(depTokenId)) relationRefsByTokenId.set(depTokenId, []);
2017
+ relationRefsByTokenId.get(depTokenId).push(rel);
2018
+ if (label === 'coordination') {
2019
+ if (!coordinationRefsByTokenId.has(depTokenId)) coordinationRefsByTokenId.set(depTokenId, []);
2020
+ coordinationRefsByTokenId.get(depTokenId).push(rel);
2021
+ }
2022
+ }
2023
+ }
2024
+
2025
+ const tokenCandidatesForVerbAnchors = Array.from(relationRefsByTokenId.keys())
2026
+ .map((id) => tokenById.get(id))
2027
+ .filter(Boolean)
2028
+ .filter((tok) => {
2029
+ const tag = String((((tok || {}).pos) || {}).tag || '');
2030
+ const coarse = String((((tok || {}).pos) || {}).coarse || '').toUpperCase();
2031
+ return isLexicalVerbPos(tag) || coarse === 'VERB';
2032
+ })
2033
+ .sort((a, b) => {
2034
+ if (a.segment_id !== b.segment_id) return a.segment_id.localeCompare(b.segment_id);
2035
+ if (a.i !== b.i) return a.i - b.i;
2036
+ return String(a.id || '').localeCompare(String(b.id || ''));
2037
+ });
2038
+
2039
+ for (const tok of tokenCandidatesForVerbAnchors) {
2040
+ const tokenId = String(tok.id || '');
2041
+ if (!tokenId || assertedHeadTokenIds.has(tokenId)) continue;
2042
+ const refs = relationRefsByTokenId.get(tokenId) || [];
2043
+ const semanticRefs = refs.filter((r) => !structuralOnlyLabels.has(String(r.label || '')));
2044
+ if (semanticRefs.length === 0) continue;
2045
+ const coordRefs = coordinationRefsByTokenId.get(tokenId) || [];
2046
+ if (coordRefs.length === 0) continue;
2047
+ const coordPeerHasAssertion = coordRefs.some((r) => {
2048
+ const peerTokenId = r.head_token_id === tokenId ? r.dep_token_id : r.head_token_id;
2049
+ return assertedHeadTokenIds.has(String(peerTokenId || ''));
2050
+ });
2051
+ if (!coordPeerHasAssertion) continue;
2052
+
2053
+ const predPick = chooseBestMentionForToken({
2054
+ tokenId,
2055
+ segmentId: tok.segment_id,
2056
+ mentionById,
2057
+ candidateMentionIds: tokenMentionIds.get(tokenId) || [],
2058
+ excludeMentionId: null,
2059
+ preferMinimalSpan: false,
2060
+ });
2061
+ const predMentionId = predPick && typeof predPick.mention_id === 'string' ? predPick.mention_id : null;
2062
+ const predMention = predMentionId ? mentionById.get(predMentionId) : null;
2063
+ if (!predMention) continue;
2064
+ const predTokenIdSet = new Set(predMention.token_ids || []);
2065
+ if (finalAssertions.some((a) => String((((a || {}).predicate) || {}).mention_id || '') === predMentionId)) {
2066
+ assertedHeadTokenIds.add(tokenId);
2067
+ continue;
2068
+ }
2069
+
2070
+ const roleBuckets = {
2071
+ actor: [],
2072
+ theme: [],
2073
+ attr: [],
2074
+ topic: [],
2075
+ location: [],
2076
+ other: [],
2077
+ };
2078
+ const otherRoleMap = new Map();
2079
+ const operatorsByKey = new Map();
2080
+ const evidenceTokenIds = new Set([tokenId]);
2081
+ const evidenceItems = [];
2082
+
2083
+ for (const r of semanticRefs) {
2084
+ const headTokenId = String(r.head_token_id || '');
2085
+ const depTokenId = String(r.dep_token_id || '');
2086
+ const evidenceItem = {
2087
+ annotation_id: r.relation_id || 'r:unknown',
2088
+ from_token_id: headTokenId,
2089
+ to_token_id: depTokenId,
2090
+ label: String(r.label || ''),
2091
+ };
2092
+ evidenceItems.push(evidenceItem);
2093
+ if (headTokenId) evidenceTokenIds.add(headTokenId);
2094
+ if (depTokenId) evidenceTokenIds.add(depTokenId);
2095
+
2096
+ if (headTokenId !== tokenId) {
2097
+ continue;
2098
+ }
2099
+
2100
+ const depMentionPick = chooseBestMentionForToken({
2101
+ tokenId: depTokenId,
2102
+ segmentId: tok.segment_id,
2103
+ mentionById,
2104
+ candidateMentionIds: tokenMentionIds.get(depTokenId) || [],
2105
+ excludeMentionId: predMentionId,
2106
+ preferMinimalSpan: true,
2107
+ });
2108
+ const depMentionIdFromPick = depMentionPick && typeof depMentionPick.mention_id === 'string'
2109
+ ? depMentionPick.mention_id
2110
+ : null;
2111
+ const depMentionIdFromRelation = typeof r.dep_mention_id === 'string' ? r.dep_mention_id : null;
2112
+ let depMentionIdForSlot = depMentionIdFromPick || depMentionIdFromRelation;
2113
+ if (mentionOverlapsTokenSet(depMentionIdForSlot, predTokenIdSet)) {
2114
+ depMentionIdForSlot = null;
2115
+ }
2116
+ if (!depMentionIdForSlot || depMentionIdForSlot === predMentionId) continue;
2117
+ const map = roleToSlot(r.label);
2118
+ if (map.slot === 'other') {
2119
+ if (!otherRoleMap.has(map.role || r.label)) otherRoleMap.set(map.role || r.label, new Set());
2120
+ otherRoleMap.get(map.role || r.label).add(depMentionIdForSlot);
2121
+ } else {
2122
+ roleBuckets[map.slot].push(depMentionIdForSlot);
2123
+ }
2124
+ coveredMentions.add(depMentionIdForSlot);
2125
+ }
2126
+
2127
+ for (const r of coordRefs) {
2128
+ const evidenceItem = {
2129
+ annotation_id: r.relation_id || 'r:unknown',
2130
+ from_token_id: r.head_token_id,
2131
+ to_token_id: r.dep_token_id,
2132
+ label: r.label,
2133
+ };
2134
+ mergeOperator(operatorsByKey, {
2135
+ kind: 'coordination_group',
2136
+ group_id: coordGroups.get(predMentionId) || `cg:${sha256Hex(`${predMentionId}|${r.relation_id || evidenceItem.from_token_id || ''}`).slice(0, 12)}`,
2137
+ value: undefined,
2138
+ evidence: [evidenceItem],
2139
+ });
2140
+ }
2141
+
2142
+ roleBuckets.actor = normalizeIds(roleBuckets.actor);
2143
+ roleBuckets.theme = normalizeIds(roleBuckets.theme);
2144
+ roleBuckets.attr = normalizeIds(roleBuckets.attr);
2145
+ roleBuckets.topic = normalizeIds(roleBuckets.topic);
2146
+ roleBuckets.location = normalizeIds(roleBuckets.location);
2147
+ roleBuckets.other = Array.from(otherRoleMap.entries())
2148
+ .sort((a, b) => a[0].localeCompare(b[0]))
2149
+ .map(([role, ids]) => ({ role, mention_ids: normalizeIds(Array.from(ids)) }));
2150
+ const workingAssertion = {
2151
+ predicate: { mention_id: predMentionId },
2152
+ };
2153
+ applyRoleBucketsToAssertion(workingAssertion, {
2154
+ actor: roleBuckets.actor.slice(),
2155
+ theme: roleBuckets.theme.slice(),
2156
+ attr: roleBuckets.attr.slice(),
2157
+ topic: roleBuckets.topic.slice(),
2158
+ location: roleBuckets.location.slice(),
2159
+ other: roleBuckets.other.map((o) => ({ role: o.role, mention_ids: (o.mention_ids || []).slice() })),
2160
+ }, mentionById);
2161
+ enforceCoreBucketTokenDisjointness(workingAssertion, mentionById, tokenById);
2162
+ trimCatchAllThemeBuckets(workingAssertion, mentionById, tokenById, bySegment);
2163
+ const workingSlots = assertionRoleBuckets(workingAssertion);
2164
+ roleBuckets.actor = normalizeIds(workingSlots.actor || []);
2165
+ roleBuckets.theme = normalizeIds(workingSlots.theme || []);
2166
+ roleBuckets.attr = normalizeIds(workingSlots.attr || []);
2167
+ roleBuckets.topic = normalizeIds(workingSlots.topic || []);
2168
+ roleBuckets.location = normalizeIds(workingSlots.location || []);
2169
+ roleBuckets.other = Array.isArray(workingSlots.other)
2170
+ ? workingSlots.other.map((o) => ({ role: o.role, mention_ids: normalizeIds(o.mention_ids || []) }))
2171
+ : [];
2172
+ const dedupOps = Array.from(operatorsByKey.values())
2173
+ .map((op) => ({ ...op, evidence: dedupeAndSortEvidence(op.evidence || []) }))
2174
+ .sort((a, b) => {
2175
+ if ((a.kind || '') !== (b.kind || '')) return (a.kind || '').localeCompare(b.kind || '');
2176
+ if ((a.value || '') !== (b.value || '')) return (a.value || '').localeCompare(b.value || '');
2177
+ if ((a.group_id || '') !== (b.group_id || '')) return (a.group_id || '').localeCompare(b.group_id || '');
2178
+ if ((a.token_id || '') !== (b.token_id || '')) return (a.token_id || '').localeCompare(b.token_id || '');
2179
+ return (a.role || '').localeCompare(b.role || '');
2180
+ });
2181
+
2182
+ evidenceItems.sort((a, b) => {
2183
+ if (a.from_token_id !== b.from_token_id) return a.from_token_id.localeCompare(b.from_token_id);
2184
+ if (a.to_token_id !== b.to_token_id) return a.to_token_id.localeCompare(b.to_token_id);
2185
+ if (a.label !== b.label) return a.label.localeCompare(b.label);
2186
+ return a.annotation_id.localeCompare(b.annotation_id);
2187
+ });
2188
+ const seenEvidence = new Set();
2189
+ const dedupEvidence = [];
2190
+ for (const e of evidenceItems) {
2191
+ const k = JSON.stringify(e);
2192
+ if (seenEvidence.has(k)) continue;
2193
+ seenEvidence.add(k);
2194
+ dedupEvidence.push(e);
2195
+ }
2196
+ if (dedupEvidence.length === 0) continue;
2197
+
2198
+ const rolePayloadForHash = canonicalizeRoleBuckets(roleBuckets, mentionById);
2199
+ const rolesForHash = rolePayloadHashInput(rolePayloadForHash);
2200
+ const opsForHash = JSON.stringify(canonicalizeOperatorsForHash(dedupOps));
2201
+ const id = `a:${predMention.segment_id}:${predMention.id}:${sha256Hex(`${rolesForHash}|${opsForHash}`).slice(0, 12)}`;
2202
+ finalAssertions.push({
2203
+ id,
2204
+ segment_id: predMention.segment_id,
2205
+ predicate: {
2206
+ mention_id: predMention.id,
2207
+ head_token_id: predMention.head_token_id,
2208
+ },
2209
+ arguments: rolePayloadForHash.arguments,
2210
+ modifiers: rolePayloadForHash.modifiers,
2211
+ operators: dedupOps,
2212
+ evidence: {
2213
+ relation_evidence: dedupEvidence,
2214
+ token_ids: normalizeIds(Array.from(evidenceTokenIds)),
2215
+ },
2216
+ diagnostics: {
2217
+ predicate_quality: isLowQualityPredicateToken(tok) ? 'low' : 'ok',
2218
+ },
2219
+ });
2220
+ assertedHeadTokenIds.add(tokenId);
2221
+ coveredMentions.add(predMention.id);
2222
+ }
2223
+
2224
+ const lexicalPredicateBySegment = new Set();
2225
+ for (const a of finalAssertions) {
2226
+ if (!a || !a.predicate) continue;
2227
+ const predTok = tokenById.get(a.predicate.head_token_id);
2228
+ const predicateClass = classifyPredicateClass(predTok);
2229
+ if (!a.diagnostics || typeof a.diagnostics !== 'object') a.diagnostics = {};
2230
+ a.diagnostics.predicate_class = predicateClass;
2231
+ if (predicateClass === 'lexical_verb') lexicalPredicateBySegment.add(a.segment_id);
2232
+ }
2233
+ for (const a of finalAssertions) {
2234
+ if (!a || !a.predicate) continue;
2235
+ const predicateClass = String((((a || {}).diagnostics) || {}).predicate_class || '');
2236
+ a.diagnostics.structural_fragment =
2237
+ (predicateClass === 'preposition' || predicateClass === 'nominal_head') &&
2238
+ lexicalPredicateBySegment.has(a.segment_id);
2239
+ }
2240
+ const clauseKeyByAssertionId = new Map();
2241
+ for (const a of finalAssertions) {
2242
+ clauseKeyByAssertionId.set(a.id, assertionClauseWindowKey(a, tokenById, bySegment));
2243
+ }
2244
+ for (const a of finalAssertions) {
2245
+ if (!a || !a.diagnostics || a.diagnostics.structural_fragment !== true) continue;
2246
+ const cls = String(a.diagnostics.predicate_class || '');
2247
+ if (!(cls === 'preposition' || cls === 'nominal_head' || cls === 'auxiliary')) continue;
2248
+ const trace = buildSuppressionEligibilityTrace({
2249
+ source: a,
2250
+ assertions: finalAssertions,
2251
+ tokenById,
2252
+ clauseKeyByAssertionId,
2253
+ });
2254
+ if (trace) {
2255
+ a.diagnostics.suppression_eligibility = trace;
2256
+ }
2257
+ }
2258
+ finalAssertions.sort((a, b) => {
2259
+ if (a.segment_id !== b.segment_id) return a.segment_id.localeCompare(b.segment_id);
2260
+ const pa = tokenById.get(a.predicate.head_token_id);
2261
+ const pb = tokenById.get(b.predicate.head_token_id);
2262
+ if (pa.span.start !== pb.span.start) return pa.span.start - pb.span.start;
2263
+ return a.id.localeCompare(b.id);
2264
+ });
2265
+
2266
+ const combinedSuppressed = suppressedAssertions
2267
+ .concat(merged.suppressedTraces || [])
2268
+ .concat(roleCarrierSuppressed.suppressedTraces || []);
2269
+ combinedSuppressed.sort((a, b) => a.id.localeCompare(b.id));
2270
+
2271
+ return { assertions: finalAssertions, coveredMentions, suppressedAssertions: combinedSuppressed };
2272
+ }
2273
+
2274
+
2275
+ module.exports = {
2276
+ isVerbPosTag,
2277
+ isLexicalVerbPos,
2278
+ classifyPredicateClass,
2279
+ isNounLikePosTag,
2280
+ isCopulaSurface,
2281
+ lower,
2282
+ isLowQualityPredicateToken,
2283
+ roleBucketsAreSemanticallyEmpty,
2284
+ isClauseBoundaryToken,
2285
+ assertionClauseWindowKey,
2286
+ assertionHasBlockingOperators,
2287
+ collectAssertionMentionRefs,
2288
+ addToOtherSlot,
2289
+ transferRoleCarrierBucketsToHost,
2290
+ collectRoleBucketMentionIds,
2291
+ transferNamedBucketsToHostOther,
2292
+ transferOperatorsToHost,
2293
+ dedupeOtherMentionsAgainstCoreBuckets,
2294
+ enforceCoreBucketTokenDisjointness,
2295
+ pruneLowCopulaBuckets,
2296
+ buildSuppressionEligibilityTrace,
2297
+ suppressRoleCarrierAssertions,
2298
+ choosePredicateUpgradeCandidate,
2299
+ mergeModalityCopulaAssertions,
2300
+ buildAssertions,
2301
+ };
2302
+
2303
+
2304
+