elementary-assertions 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +353 -0
- package/LICENSE +21 -0
- package/README.md +211 -0
- package/bin/elementary-assertions.js +8 -0
- package/docs/DEV_TOOLING.md +98 -0
- package/docs/NPM_RELEASE.md +177 -0
- package/docs/OPERATIONAL.md +159 -0
- package/docs/RELEASE_NOTES_TEMPLATE.md +37 -0
- package/docs/REPO_WORKFLOWS.md +48 -0
- package/package.json +46 -0
- package/src/core/accepted-annotations.js +44 -0
- package/src/core/assertions.js +2304 -0
- package/src/core/determinism.js +95 -0
- package/src/core/diagnostics.js +496 -0
- package/src/core/ids.js +9 -0
- package/src/core/mention-builder.js +272 -0
- package/src/core/mention-evidence.js +52 -0
- package/src/core/mention-head-resolution.js +108 -0
- package/src/core/mention-materialization.js +31 -0
- package/src/core/mentions.js +149 -0
- package/src/core/output.js +296 -0
- package/src/core/projection.js +192 -0
- package/src/core/roles.js +164 -0
- package/src/core/strings.js +7 -0
- package/src/core/tokens.js +53 -0
- package/src/core/upstream.js +31 -0
- package/src/index.js +6 -0
- package/src/render/index.js +5 -0
- package/src/render/layouts/compact.js +10 -0
- package/src/render/layouts/meaning.js +7 -0
- package/src/render/layouts/readable.js +7 -0
- package/src/render/layouts/table.js +7 -0
- package/src/render/render.js +931 -0
- package/src/run.js +278 -0
- package/src/schema/seed.elementary-assertions.schema.json +1751 -0
- package/src/tools/cli.js +158 -0
- package/src/tools/index.js +6 -0
- package/src/tools/io.js +55 -0
- package/src/validate/ajv.js +20 -0
- package/src/validate/coverage.js +215 -0
- package/src/validate/determinism.js +115 -0
- package/src/validate/diagnostics-strict.js +392 -0
- package/src/validate/errors.js +19 -0
- package/src/validate/index.js +20 -0
- package/src/validate/integrity.js +41 -0
- package/src/validate/invariants.js +157 -0
- package/src/validate/references.js +110 -0
- package/src/validate/schema.js +50 -0
package/src/tools/cli.js
ADDED
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
const { runElementaryAssertions, runFromRelations } = require("../run");
|
|
2
|
+
const { validateElementaryAssertions } = require("../validate");
|
|
3
|
+
const { renderElementaryAssertions } = require("../render");
|
|
4
|
+
const { arg, normalizeOptionalString, parseStrictBoolean, readUtf8, readUtf8WithFileSource, writeUtf8 } = require("./io");
|
|
5
|
+
|
|
6
|
+
function loadYaml() {
|
|
7
|
+
try {
|
|
8
|
+
return require("js-yaml");
|
|
9
|
+
} catch (err) {
|
|
10
|
+
throw new Error("Unable to load js-yaml. Install dependencies before using CLI file commands.");
|
|
11
|
+
}
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function usage() {
|
|
15
|
+
return [
|
|
16
|
+
"Usage:",
|
|
17
|
+
" elementary-assertions run --text <string> | --in <path> | --relations <path> [--out <path>] [--timeout-ms <ms>] [--wti-endpoint <url>] [--wti-timeout-ms <ms]",
|
|
18
|
+
" elementary-assertions validate --in <path>",
|
|
19
|
+
" elementary-assertions render --in <path> [--out <path>] --format <txt|md> --layout <compact|readable|table|meaning>",
|
|
20
|
+
].join("\n");
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
const DEV_DIAGNOSTIC_FLAGS = [
|
|
24
|
+
"--diagnose-wiki-upstream",
|
|
25
|
+
"--diagnose-wti-wiring",
|
|
26
|
+
"--diagnose-coverage-audit",
|
|
27
|
+
];
|
|
28
|
+
|
|
29
|
+
function enforceDevFlagPolicy(args) {
|
|
30
|
+
const hasDev = args.includes("--dev");
|
|
31
|
+
const usedDiagnostics = DEV_DIAGNOSTIC_FLAGS.filter((f) => args.includes(f));
|
|
32
|
+
if (usedDiagnostics.length === 0) return;
|
|
33
|
+
if (!hasDev) {
|
|
34
|
+
throw new Error(`Diagnostic flags require --dev: ${usedDiagnostics.join(", ")}`);
|
|
35
|
+
}
|
|
36
|
+
throw new Error(`Diagnostic flags are developer-only and not available in the public CLI: ${usedDiagnostics.join(", ")}`);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
function parseRunInput(args) {
|
|
40
|
+
const text = arg(args, "--text");
|
|
41
|
+
const inPath = arg(args, "--in");
|
|
42
|
+
const relationsPath = arg(args, "--relations");
|
|
43
|
+
const hasText = typeof text === "string";
|
|
44
|
+
const hasIn = typeof inPath === "string";
|
|
45
|
+
const hasRelations = typeof relationsPath === "string";
|
|
46
|
+
const inputCount = Number(hasText) + Number(hasIn) + Number(hasRelations);
|
|
47
|
+
if (inputCount > 1) throw new Error("Exactly one of --text, --in, or --relations is required; multiple provided.");
|
|
48
|
+
if (inputCount < 1) throw new Error("Exactly one of --text, --in, or --relations is required; none provided.");
|
|
49
|
+
return { text, inPath, relationsPath };
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
async function runCommand(args) {
|
|
53
|
+
const { text, inPath, relationsPath } = parseRunInput(args);
|
|
54
|
+
const outPath = arg(args, "--out");
|
|
55
|
+
const timeoutMs = Number(arg(args, "--timeout-ms") || 0) || undefined;
|
|
56
|
+
const wtiTimeoutMs = Number(arg(args, "--wti-timeout-ms") || 0) || undefined;
|
|
57
|
+
const endpoint = normalizeOptionalString(arg(args, "--wti-endpoint") || process.env.WIKIPEDIA_TITLE_INDEX_ENDPOINT || "");
|
|
58
|
+
|
|
59
|
+
let doc;
|
|
60
|
+
if (typeof relationsPath === "string") {
|
|
61
|
+
const yaml = loadYaml();
|
|
62
|
+
const { text: raw, sourceInput } = readUtf8WithFileSource(relationsPath, "relations input file", "seed.relations.yaml");
|
|
63
|
+
const relationsDoc = yaml.load(raw);
|
|
64
|
+
doc = runFromRelations(relationsDoc, {
|
|
65
|
+
wtiEndpoint: endpoint,
|
|
66
|
+
sourceInputs: [sourceInput],
|
|
67
|
+
suppressDefaultRelationsSource: true,
|
|
68
|
+
});
|
|
69
|
+
} else if (typeof text === "string") {
|
|
70
|
+
doc = await runElementaryAssertions(text, {
|
|
71
|
+
services: { "wikipedia-title-index": { endpoint } },
|
|
72
|
+
timeoutMs,
|
|
73
|
+
wtiTimeoutMs,
|
|
74
|
+
});
|
|
75
|
+
} else {
|
|
76
|
+
const { text: source, sourceInput } = readUtf8WithFileSource(inPath, "input file", "seed.txt");
|
|
77
|
+
doc = await runElementaryAssertions(source, {
|
|
78
|
+
services: { "wikipedia-title-index": { endpoint } },
|
|
79
|
+
timeoutMs,
|
|
80
|
+
wtiTimeoutMs,
|
|
81
|
+
sourceInputs: [sourceInput],
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
const yaml = loadYaml();
|
|
86
|
+
const out = yaml.dump(doc, { lineWidth: -1 });
|
|
87
|
+
if (outPath) writeUtf8(outPath, out);
|
|
88
|
+
else process.stdout.write(out);
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
function validateCommand(args) {
|
|
92
|
+
const inPath = arg(args, "--in");
|
|
93
|
+
if (!inPath) throw new Error("validate requires --in <path>");
|
|
94
|
+
const yaml = loadYaml();
|
|
95
|
+
const raw = readUtf8(inPath, "input file");
|
|
96
|
+
const doc = yaml.load(raw);
|
|
97
|
+
validateElementaryAssertions(doc);
|
|
98
|
+
process.stdout.write("ok\n");
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
function renderCommand(args) {
|
|
102
|
+
const inPath = arg(args, "--in");
|
|
103
|
+
if (!inPath) throw new Error("render requires --in <path>");
|
|
104
|
+
const yaml = loadYaml();
|
|
105
|
+
|
|
106
|
+
const outPath = arg(args, "--out");
|
|
107
|
+
const format = arg(args, "--format") || "txt";
|
|
108
|
+
const layout = arg(args, "--layout") || "compact";
|
|
109
|
+
|
|
110
|
+
const raw = readUtf8(inPath, "input file");
|
|
111
|
+
const doc = yaml.load(raw);
|
|
112
|
+
|
|
113
|
+
const options = {
|
|
114
|
+
format,
|
|
115
|
+
layout,
|
|
116
|
+
segments: parseStrictBoolean(arg(args, "--segments") || "true", "--segments"),
|
|
117
|
+
mentions: parseStrictBoolean(arg(args, "--mentions") || "true", "--mentions"),
|
|
118
|
+
coverage: parseStrictBoolean(arg(args, "--coverage") || "true", "--coverage"),
|
|
119
|
+
debugIds: parseStrictBoolean(arg(args, "--debug-ids") || "false", "--debug-ids"),
|
|
120
|
+
normalizeDeterminers: parseStrictBoolean(arg(args, "--normalize-determiners") || "true", "--normalize-determiners"),
|
|
121
|
+
renderUncoveredDelta: parseStrictBoolean(arg(args, "--render-uncovered-delta") || "false", "--render-uncovered-delta"),
|
|
122
|
+
};
|
|
123
|
+
|
|
124
|
+
const rendered = renderElementaryAssertions(doc, options);
|
|
125
|
+
if (outPath) writeUtf8(outPath, rendered);
|
|
126
|
+
else process.stdout.write(rendered);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
async function runCli(argv = process.argv.slice(2)) {
|
|
130
|
+
const [cmd, ...args] = argv;
|
|
131
|
+
if (!cmd || cmd === "--help" || cmd === "-h") {
|
|
132
|
+
process.stdout.write(`${usage()}\n`);
|
|
133
|
+
return;
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
if (cmd === "run") {
|
|
137
|
+
enforceDevFlagPolicy(args);
|
|
138
|
+
await runCommand(args);
|
|
139
|
+
return;
|
|
140
|
+
}
|
|
141
|
+
if (cmd === "validate") {
|
|
142
|
+
enforceDevFlagPolicy(args);
|
|
143
|
+
validateCommand(args);
|
|
144
|
+
return;
|
|
145
|
+
}
|
|
146
|
+
if (cmd === "render") {
|
|
147
|
+
enforceDevFlagPolicy(args);
|
|
148
|
+
renderCommand(args);
|
|
149
|
+
return;
|
|
150
|
+
}
|
|
151
|
+
|
|
152
|
+
throw new Error(`Unknown command: ${cmd}`);
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
module.exports = {
|
|
156
|
+
runCli,
|
|
157
|
+
usage,
|
|
158
|
+
};
|
package/src/tools/io.js
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
const fs = require("node:fs");
|
|
2
|
+
const path = require("node:path");
|
|
3
|
+
const { normalizeOptionalString } = require("../core/strings");
|
|
4
|
+
const { sha256Hex } = require("../core/determinism");
|
|
5
|
+
|
|
6
|
+
function parseStrictBoolean(raw, name) {
|
|
7
|
+
const v = String(raw || "").trim().toLowerCase();
|
|
8
|
+
if (v === "true") return true;
|
|
9
|
+
if (v === "false") return false;
|
|
10
|
+
throw new Error(`Invalid value for ${name}: expected true|false.`);
|
|
11
|
+
}
|
|
12
|
+
|
|
13
|
+
function readUtf8(filePath, label) {
|
|
14
|
+
try {
|
|
15
|
+
return fs.readFileSync(filePath, "utf8");
|
|
16
|
+
} catch (err) {
|
|
17
|
+
throw new Error(`Error reading ${label}: ${err && err.message ? err.message : String(err)}`);
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
function readUtf8WithFileSource(filePath, label, artifact) {
|
|
22
|
+
const text = readUtf8(filePath, label);
|
|
23
|
+
const stat = fs.statSync(filePath);
|
|
24
|
+
return {
|
|
25
|
+
text,
|
|
26
|
+
sourceInput: {
|
|
27
|
+
artifact,
|
|
28
|
+
digest: sha256Hex(text),
|
|
29
|
+
origin: {
|
|
30
|
+
kind: "file",
|
|
31
|
+
path: path.resolve(filePath),
|
|
32
|
+
mtime_ms: Math.max(0, Math.trunc(stat.mtimeMs)),
|
|
33
|
+
},
|
|
34
|
+
},
|
|
35
|
+
};
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
function writeUtf8(filePath, text) {
|
|
39
|
+
fs.writeFileSync(filePath, text, "utf8");
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
function arg(args, name) {
|
|
43
|
+
const i = args.indexOf(name);
|
|
44
|
+
if (i < 0 || i + 1 >= args.length) return null;
|
|
45
|
+
return args[i + 1];
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
module.exports = {
|
|
49
|
+
normalizeOptionalString,
|
|
50
|
+
parseStrictBoolean,
|
|
51
|
+
readUtf8,
|
|
52
|
+
readUtf8WithFileSource,
|
|
53
|
+
writeUtf8,
|
|
54
|
+
arg,
|
|
55
|
+
};
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
const Ajv2020 = require("ajv/dist/2020");
|
|
2
|
+
const addFormats = require("ajv-formats");
|
|
3
|
+
const schema = require("../schema/seed.elementary-assertions.schema.json");
|
|
4
|
+
|
|
5
|
+
let cachedValidate = null;
|
|
6
|
+
|
|
7
|
+
function getSchemaValidator() {
|
|
8
|
+
if (cachedValidate) return cachedValidate;
|
|
9
|
+
const ajv = new Ajv2020({
|
|
10
|
+
allErrors: true,
|
|
11
|
+
strict: false,
|
|
12
|
+
});
|
|
13
|
+
addFormats(ajv);
|
|
14
|
+
cachedValidate = ajv.compile(schema);
|
|
15
|
+
return cachedValidate;
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
module.exports = {
|
|
19
|
+
getSchemaValidator,
|
|
20
|
+
};
|
|
@@ -0,0 +1,215 @@
|
|
|
1
|
+
const { ensureSortedStrings } = require("./determinism");
|
|
2
|
+
const { failValidation } = require("./errors");
|
|
3
|
+
|
|
4
|
+
function isContentPosTag(tag) {
|
|
5
|
+
if (typeof tag !== "string" || tag.length === 0) return false;
|
|
6
|
+
return /^(NN|NNS|NNP|NNPS|VB|VBD|VBG|VBN|VBP|VBZ|JJ|JJR|JJS|RB|RBR|RBS|CD|PRP|PRP\$|FW|UH)$/.test(tag);
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
function isPunctuationSurface(surface) {
|
|
10
|
+
if (typeof surface !== "string" || surface.length === 0) return false;
|
|
11
|
+
return /^[\p{P}\p{S}]+$/u.test(surface);
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
function buildExpectedCoveragePrimarySet(doc) {
|
|
15
|
+
const tokenById = new Map((doc.tokens || []).map((token) => [token.id, token]));
|
|
16
|
+
const expected = new Set();
|
|
17
|
+
for (const mention of doc.mentions || []) {
|
|
18
|
+
if (!mention || mention.is_primary !== true || typeof mention.id !== "string" || mention.id.length === 0) continue;
|
|
19
|
+
const headToken = tokenById.get(mention.head_token_id);
|
|
20
|
+
if (!headToken) continue;
|
|
21
|
+
const tag = headToken.pos && typeof headToken.pos.tag === "string" ? headToken.pos.tag : "";
|
|
22
|
+
if (!isContentPosTag(tag)) continue;
|
|
23
|
+
if (isPunctuationSurface(headToken.surface)) continue;
|
|
24
|
+
expected.add(mention.id);
|
|
25
|
+
}
|
|
26
|
+
return expected;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
function validateCoverage(doc, mentionById, options = {}) {
|
|
30
|
+
const coverage = doc.coverage || {};
|
|
31
|
+
const primary = Array.isArray(coverage.primary_mention_ids) ? coverage.primary_mention_ids : [];
|
|
32
|
+
const covered = Array.isArray(coverage.covered_primary_mention_ids) ? coverage.covered_primary_mention_ids : [];
|
|
33
|
+
const uncovered = Array.isArray(coverage.uncovered_primary_mention_ids) ? coverage.uncovered_primary_mention_ids : [];
|
|
34
|
+
const unresolved = Array.isArray(coverage.unresolved) ? coverage.unresolved : [];
|
|
35
|
+
const tokenById = new Map((doc.tokens || []).map((token) => [token && token.id, token]));
|
|
36
|
+
|
|
37
|
+
ensureSortedStrings(primary, "coverage.primary_mention_ids must be sorted for determinism.");
|
|
38
|
+
ensureSortedStrings(covered, "coverage.covered_primary_mention_ids must be sorted for determinism.");
|
|
39
|
+
ensureSortedStrings(uncovered, "coverage.uncovered_primary_mention_ids must be sorted for determinism.");
|
|
40
|
+
|
|
41
|
+
const primarySet = new Set(primary);
|
|
42
|
+
const coveredSet = new Set(covered);
|
|
43
|
+
const uncoveredSet = new Set(uncovered);
|
|
44
|
+
|
|
45
|
+
for (const id of coveredSet) {
|
|
46
|
+
if (!primarySet.has(id)) {
|
|
47
|
+
failValidation("EA_VALIDATE_COVERAGE_NON_PRIMARY_COVERED", `Integrity error: coverage.covered_primary_mention_ids contains non-primary mention ${id}.`);
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
for (const id of uncoveredSet) {
|
|
51
|
+
if (!primarySet.has(id)) {
|
|
52
|
+
failValidation("EA_VALIDATE_COVERAGE_NON_PRIMARY_UNCOVERED", `Integrity error: coverage.uncovered_primary_mention_ids contains non-primary mention ${id}.`);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
for (const id of primarySet) {
|
|
56
|
+
const isCovered = coveredSet.has(id);
|
|
57
|
+
const isUncovered = uncoveredSet.has(id);
|
|
58
|
+
if (isCovered === isUncovered) {
|
|
59
|
+
failValidation("EA_VALIDATE_COVERAGE_PARTITION", `Integrity error: primary mention ${id} must appear in exactly one of covered or uncovered.`);
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
const unresolvedMentionIds = [];
|
|
64
|
+
for (const item of unresolved) {
|
|
65
|
+
if (!item || typeof item.mention_id !== "string" || !mentionById.has(item.mention_id)) {
|
|
66
|
+
failValidation("EA_VALIDATE_COVERAGE_UNKNOWN_UNRESOLVED_MENTION", "Integrity error: coverage.unresolved references unknown mention.");
|
|
67
|
+
}
|
|
68
|
+
unresolvedMentionIds.push(item.mention_id);
|
|
69
|
+
}
|
|
70
|
+
if (new Set(unresolvedMentionIds).size !== unresolvedMentionIds.length) {
|
|
71
|
+
failValidation("EA_VALIDATE_COVERAGE_DUPLICATE_UNRESOLVED", "Integrity error: coverage.unresolved contains duplicate mention_id entries.");
|
|
72
|
+
}
|
|
73
|
+
if (unresolvedMentionIds.length !== uncoveredSet.size) {
|
|
74
|
+
failValidation("EA_VALIDATE_COVERAGE_UNRESOLVED_LENGTH", "Integrity error: coverage.unresolved length must match uncovered_primary_mention_ids length.");
|
|
75
|
+
}
|
|
76
|
+
for (const mentionId of unresolvedMentionIds) {
|
|
77
|
+
if (!uncoveredSet.has(mentionId)) {
|
|
78
|
+
failValidation("EA_VALIDATE_COVERAGE_UNRESOLVED_MEMBERSHIP", `Integrity error: coverage.unresolved mention ${mentionId} must be in uncovered_primary_mention_ids.`);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
if (options && options.strict) {
|
|
83
|
+
for (const item of unresolved) {
|
|
84
|
+
const mentionId = String((item && item.mention_id) || "");
|
|
85
|
+
const unresolvedSegmentId = String((item && item.segment_id) || "");
|
|
86
|
+
const unresolvedMention = mentionById.get(mentionId);
|
|
87
|
+
if (unresolvedMention && unresolvedMention.segment_id !== unresolvedSegmentId) {
|
|
88
|
+
failValidation(
|
|
89
|
+
"EA_VALIDATE_STRICT_UNRESOLVED_SEGMENT_MISMATCH",
|
|
90
|
+
"Strict validation error: coverage.unresolved[*].segment_id must match referenced mention/token segments."
|
|
91
|
+
);
|
|
92
|
+
}
|
|
93
|
+
const mentionIds = Array.isArray(item && item.mention_ids) ? item.mention_ids : null;
|
|
94
|
+
if (!Array.isArray(mentionIds)) {
|
|
95
|
+
failValidation(
|
|
96
|
+
"EA_VALIDATE_STRICT_UNRESOLVED_MENTION_IDS",
|
|
97
|
+
"Strict validation error: coverage.unresolved[*].mention_ids must be an array."
|
|
98
|
+
);
|
|
99
|
+
}
|
|
100
|
+
const mentionIdSet = new Set();
|
|
101
|
+
let prevMentionRef = null;
|
|
102
|
+
for (const ref of mentionIds) {
|
|
103
|
+
if (typeof ref !== "string" || ref.length === 0) {
|
|
104
|
+
failValidation(
|
|
105
|
+
"EA_VALIDATE_STRICT_UNRESOLVED_MENTION_IDS",
|
|
106
|
+
"Strict validation error: coverage.unresolved[*].mention_ids must contain non-empty string ids."
|
|
107
|
+
);
|
|
108
|
+
}
|
|
109
|
+
if (prevMentionRef !== null && prevMentionRef.localeCompare(ref) > 0) {
|
|
110
|
+
failValidation(
|
|
111
|
+
"EA_VALIDATE_STRICT_UNRESOLVED_MENTION_IDS",
|
|
112
|
+
"Strict validation error: coverage.unresolved[*].mention_ids must be sorted."
|
|
113
|
+
);
|
|
114
|
+
}
|
|
115
|
+
if (mentionIdSet.has(ref)) {
|
|
116
|
+
failValidation(
|
|
117
|
+
"EA_VALIDATE_STRICT_UNRESOLVED_MENTION_IDS",
|
|
118
|
+
"Strict validation error: coverage.unresolved[*].mention_ids must be unique."
|
|
119
|
+
);
|
|
120
|
+
}
|
|
121
|
+
const mention = mentionById.get(ref);
|
|
122
|
+
if (!mention) {
|
|
123
|
+
failValidation(
|
|
124
|
+
"EA_VALIDATE_STRICT_UNRESOLVED_MENTION_REFERENCE",
|
|
125
|
+
"Strict validation error: coverage.unresolved[*].mention_ids must reference existing mentions."
|
|
126
|
+
);
|
|
127
|
+
}
|
|
128
|
+
if (mention.segment_id !== unresolvedSegmentId) {
|
|
129
|
+
failValidation(
|
|
130
|
+
"EA_VALIDATE_STRICT_UNRESOLVED_SEGMENT_MISMATCH",
|
|
131
|
+
"Strict validation error: coverage.unresolved[*].segment_id must match referenced mention/token segments."
|
|
132
|
+
);
|
|
133
|
+
}
|
|
134
|
+
mentionIdSet.add(ref);
|
|
135
|
+
prevMentionRef = ref;
|
|
136
|
+
}
|
|
137
|
+
if (mentionId && !mentionIdSet.has(mentionId)) {
|
|
138
|
+
failValidation(
|
|
139
|
+
"EA_VALIDATE_STRICT_UNRESOLVED_MENTION_IDS",
|
|
140
|
+
"Strict validation error: coverage.unresolved[*].mention_ids must include mention_id."
|
|
141
|
+
);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
const evidence = item && item.evidence && typeof item.evidence === "object" ? item.evidence : {};
|
|
145
|
+
const tokenIds = Array.isArray(evidence.token_ids) ? evidence.token_ids : [];
|
|
146
|
+
let prevTokenId = null;
|
|
147
|
+
for (const tokenId of tokenIds) {
|
|
148
|
+
if (typeof tokenId !== "string" || tokenId.length === 0) {
|
|
149
|
+
failValidation(
|
|
150
|
+
"EA_VALIDATE_STRICT_UNRESOLVED_EVIDENCE_TOKEN_IDS",
|
|
151
|
+
"Strict validation error: coverage.unresolved[*].evidence.token_ids must contain non-empty string token ids."
|
|
152
|
+
);
|
|
153
|
+
}
|
|
154
|
+
const token = tokenById.get(tokenId);
|
|
155
|
+
if (!token) {
|
|
156
|
+
failValidation(
|
|
157
|
+
"EA_VALIDATE_STRICT_UNRESOLVED_EVIDENCE_TOKEN_REFERENCE",
|
|
158
|
+
"Strict validation error: coverage.unresolved[*].evidence.token_ids must reference existing tokens."
|
|
159
|
+
);
|
|
160
|
+
}
|
|
161
|
+
if (token.segment_id !== unresolvedSegmentId) {
|
|
162
|
+
failValidation(
|
|
163
|
+
"EA_VALIDATE_STRICT_UNRESOLVED_SEGMENT_MISMATCH",
|
|
164
|
+
"Strict validation error: coverage.unresolved[*].segment_id must match referenced mention/token segments."
|
|
165
|
+
);
|
|
166
|
+
}
|
|
167
|
+
if (prevTokenId !== null && prevTokenId.localeCompare(tokenId) > 0) {
|
|
168
|
+
failValidation(
|
|
169
|
+
"EA_VALIDATE_STRICT_UNRESOLVED_EVIDENCE_TOKEN_IDS",
|
|
170
|
+
"Strict validation error: coverage.unresolved[*].evidence.token_ids must be sorted."
|
|
171
|
+
);
|
|
172
|
+
}
|
|
173
|
+
prevTokenId = tokenId;
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
const upstreamRelationIds = Array.isArray(evidence.upstream_relation_ids) ? evidence.upstream_relation_ids : [];
|
|
177
|
+
let prevUpstreamId = null;
|
|
178
|
+
for (const relationId of upstreamRelationIds) {
|
|
179
|
+
if (typeof relationId !== "string" || relationId.length === 0) {
|
|
180
|
+
failValidation(
|
|
181
|
+
"EA_VALIDATE_STRICT_UNRESOLVED_UPSTREAM_RELATION_IDS",
|
|
182
|
+
"Strict validation error: coverage.unresolved[*].evidence.upstream_relation_ids must contain non-empty string ids."
|
|
183
|
+
);
|
|
184
|
+
}
|
|
185
|
+
if (prevUpstreamId !== null && prevUpstreamId.localeCompare(relationId) > 0) {
|
|
186
|
+
failValidation(
|
|
187
|
+
"EA_VALIDATE_STRICT_UNRESOLVED_UPSTREAM_RELATION_IDS",
|
|
188
|
+
"Strict validation error: coverage.unresolved[*].evidence.upstream_relation_ids must be sorted."
|
|
189
|
+
);
|
|
190
|
+
}
|
|
191
|
+
prevUpstreamId = relationId;
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
const expectedPrimary = buildExpectedCoveragePrimarySet(doc);
|
|
196
|
+
if (expectedPrimary.size !== primarySet.size) {
|
|
197
|
+
failValidation(
|
|
198
|
+
"EA_VALIDATE_STRICT_COVERAGE_PRIMARY_SET",
|
|
199
|
+
"Strict validation error: coverage.primary_mention_ids must equal the derived domain-primary mention set."
|
|
200
|
+
);
|
|
201
|
+
}
|
|
202
|
+
for (const mentionId of expectedPrimary) {
|
|
203
|
+
if (!primarySet.has(mentionId)) {
|
|
204
|
+
failValidation(
|
|
205
|
+
"EA_VALIDATE_STRICT_COVERAGE_PRIMARY_SET",
|
|
206
|
+
"Strict validation error: coverage.primary_mention_ids must equal the derived domain-primary mention set."
|
|
207
|
+
);
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
|
|
213
|
+
module.exports = {
|
|
214
|
+
validateCoverage,
|
|
215
|
+
};
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
const { failValidation } = require("./errors");
|
|
2
|
+
|
|
3
|
+
function isSortedStrings(arr) {
|
|
4
|
+
for (let i = 1; i < arr.length; i += 1) {
|
|
5
|
+
if (String(arr[i - 1]).localeCompare(String(arr[i])) > 0) return false;
|
|
6
|
+
}
|
|
7
|
+
return true;
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
function ensureSortedStrings(arr, message) {
|
|
11
|
+
if (!isSortedStrings(arr || [])) {
|
|
12
|
+
failValidation("EA_VALIDATE_DETERMINISM_SORT", `Integrity error: ${message}`);
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
function relationEvidenceSortKey(ev) {
|
|
17
|
+
return [
|
|
18
|
+
String((ev && ev.from_token_id) || ""),
|
|
19
|
+
String((ev && ev.to_token_id) || ""),
|
|
20
|
+
String((ev && ev.label) || ""),
|
|
21
|
+
String((ev && (ev.relation_id || ev.annotation_id)) || ""),
|
|
22
|
+
JSON.stringify(ev || {}),
|
|
23
|
+
].join("|");
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
function argumentRolePriority(role) {
|
|
27
|
+
const r = String(role || "");
|
|
28
|
+
if (r === "actor") return 0;
|
|
29
|
+
if (r === "patient") return 1;
|
|
30
|
+
if (r === "location") return 2;
|
|
31
|
+
if (r === "theme") return 3;
|
|
32
|
+
if (r === "attribute") return 4;
|
|
33
|
+
if (r === "topic") return 5;
|
|
34
|
+
return 10;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
function modifierRolePriority(role) {
|
|
38
|
+
const r = String(role || "");
|
|
39
|
+
if (r === "recipient") return 0;
|
|
40
|
+
if (r === "modifier") return 1;
|
|
41
|
+
return 10;
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
function roleEntrySortKey(entry, priorityFn) {
|
|
45
|
+
const role = String((entry && entry.role) || "");
|
|
46
|
+
const mentionIds = Array.isArray(entry && entry.mention_ids) ? entry.mention_ids : [];
|
|
47
|
+
const evidence = entry && entry.evidence && typeof entry.evidence === "object" ? entry.evidence : {};
|
|
48
|
+
const relationIds = Array.isArray(evidence.relation_ids) ? evidence.relation_ids : [];
|
|
49
|
+
const tokenIds = Array.isArray(evidence.token_ids) ? evidence.token_ids : [];
|
|
50
|
+
const priority = String(priorityFn(role)).padStart(2, "0");
|
|
51
|
+
return `${priority}|${role}|${JSON.stringify(mentionIds)}|${JSON.stringify(relationIds)}|${JSON.stringify(tokenIds)}`;
|
|
52
|
+
}
|
|
53
|
+
|
|
54
|
+
function validateAssertionDeterminism(assertion, assertionId) {
|
|
55
|
+
const evidence = assertion && assertion.evidence && typeof assertion.evidence === "object" ? assertion.evidence : {};
|
|
56
|
+
const evidenceTokenIds = Array.isArray(evidence.token_ids) ? evidence.token_ids : [];
|
|
57
|
+
ensureSortedStrings(evidenceTokenIds, `assertion ${assertionId} evidence.token_ids must be sorted for determinism.`);
|
|
58
|
+
|
|
59
|
+
const relationEvidence = Array.isArray(evidence.relation_evidence) ? evidence.relation_evidence : [];
|
|
60
|
+
for (let i = 1; i < relationEvidence.length; i += 1) {
|
|
61
|
+
const prev = relationEvidenceSortKey(relationEvidence[i - 1]);
|
|
62
|
+
const cur = relationEvidenceSortKey(relationEvidence[i]);
|
|
63
|
+
if (prev.localeCompare(cur) > 0) {
|
|
64
|
+
failValidation("EA_VALIDATE_DETERMINISM_RELATION_EVIDENCE_ORDER", `Integrity error: assertion ${assertionId} evidence.relation_evidence must be sorted for determinism.`);
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
for (const entry of assertion.arguments || []) {
|
|
69
|
+
const mids = Array.isArray(entry && entry.mention_ids) ? entry.mention_ids : [];
|
|
70
|
+
ensureSortedStrings(mids, `assertion ${assertionId} arguments[*].mention_ids must be sorted for determinism.`);
|
|
71
|
+
const entryEvidence = entry && entry.evidence && typeof entry.evidence === "object" ? entry.evidence : {};
|
|
72
|
+
ensureSortedStrings(entryEvidence.relation_ids || [], `assertion ${assertionId} arguments[*].evidence.relation_ids must be sorted for determinism.`);
|
|
73
|
+
ensureSortedStrings(entryEvidence.token_ids || [], `assertion ${assertionId} arguments[*].evidence.token_ids must be sorted for determinism.`);
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
for (const entry of assertion.modifiers || []) {
|
|
77
|
+
const mids = Array.isArray(entry && entry.mention_ids) ? entry.mention_ids : [];
|
|
78
|
+
ensureSortedStrings(mids, `assertion ${assertionId} modifiers[*].mention_ids must be sorted for determinism.`);
|
|
79
|
+
const entryEvidence = entry && entry.evidence && typeof entry.evidence === "object" ? entry.evidence : {};
|
|
80
|
+
ensureSortedStrings(entryEvidence.relation_ids || [], `assertion ${assertionId} modifiers[*].evidence.relation_ids must be sorted for determinism.`);
|
|
81
|
+
ensureSortedStrings(entryEvidence.token_ids || [], `assertion ${assertionId} modifiers[*].evidence.token_ids must be sorted for determinism.`);
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
for (let i = 1; i < assertion.arguments.length; i += 1) {
|
|
85
|
+
const prev = roleEntrySortKey(assertion.arguments[i - 1], argumentRolePriority);
|
|
86
|
+
const cur = roleEntrySortKey(assertion.arguments[i], argumentRolePriority);
|
|
87
|
+
if (prev.localeCompare(cur) > 0) {
|
|
88
|
+
failValidation("EA_VALIDATE_DETERMINISM_ARGUMENT_ORDER", `Integrity error: assertion ${assertionId} arguments must be sorted for determinism.`);
|
|
89
|
+
}
|
|
90
|
+
}
|
|
91
|
+
for (let i = 1; i < assertion.modifiers.length; i += 1) {
|
|
92
|
+
const prev = roleEntrySortKey(assertion.modifiers[i - 1], modifierRolePriority);
|
|
93
|
+
const cur = roleEntrySortKey(assertion.modifiers[i], modifierRolePriority);
|
|
94
|
+
if (prev.localeCompare(cur) > 0) {
|
|
95
|
+
failValidation("EA_VALIDATE_DETERMINISM_MODIFIER_ORDER", `Integrity error: assertion ${assertionId} modifiers must be sorted for determinism.`);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
for (const op of assertion.operators || []) {
|
|
100
|
+
const opEvidence = Array.isArray(op && op.evidence) ? op.evidence : [];
|
|
101
|
+
for (let i = 1; i < opEvidence.length; i += 1) {
|
|
102
|
+
const prev = relationEvidenceSortKey(opEvidence[i - 1]);
|
|
103
|
+
const cur = relationEvidenceSortKey(opEvidence[i]);
|
|
104
|
+
if (prev.localeCompare(cur) > 0) {
|
|
105
|
+
failValidation("EA_VALIDATE_DETERMINISM_OPERATOR_EVIDENCE_ORDER", `Integrity error: assertion ${assertionId} operators[*].evidence must be sorted for determinism.`);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
module.exports = {
|
|
112
|
+
ensureSortedStrings,
|
|
113
|
+
relationEvidenceSortKey,
|
|
114
|
+
validateAssertionDeterminism,
|
|
115
|
+
};
|