elementary-assertions 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +353 -0
- package/LICENSE +21 -0
- package/README.md +211 -0
- package/bin/elementary-assertions.js +8 -0
- package/docs/DEV_TOOLING.md +98 -0
- package/docs/NPM_RELEASE.md +177 -0
- package/docs/OPERATIONAL.md +159 -0
- package/docs/RELEASE_NOTES_TEMPLATE.md +37 -0
- package/docs/REPO_WORKFLOWS.md +48 -0
- package/package.json +46 -0
- package/src/core/accepted-annotations.js +44 -0
- package/src/core/assertions.js +2304 -0
- package/src/core/determinism.js +95 -0
- package/src/core/diagnostics.js +496 -0
- package/src/core/ids.js +9 -0
- package/src/core/mention-builder.js +272 -0
- package/src/core/mention-evidence.js +52 -0
- package/src/core/mention-head-resolution.js +108 -0
- package/src/core/mention-materialization.js +31 -0
- package/src/core/mentions.js +149 -0
- package/src/core/output.js +296 -0
- package/src/core/projection.js +192 -0
- package/src/core/roles.js +164 -0
- package/src/core/strings.js +7 -0
- package/src/core/tokens.js +53 -0
- package/src/core/upstream.js +31 -0
- package/src/index.js +6 -0
- package/src/render/index.js +5 -0
- package/src/render/layouts/compact.js +10 -0
- package/src/render/layouts/meaning.js +7 -0
- package/src/render/layouts/readable.js +7 -0
- package/src/render/layouts/table.js +7 -0
- package/src/render/render.js +931 -0
- package/src/run.js +278 -0
- package/src/schema/seed.elementary-assertions.schema.json +1751 -0
- package/src/tools/cli.js +158 -0
- package/src/tools/index.js +6 -0
- package/src/tools/io.js +55 -0
- package/src/validate/ajv.js +20 -0
- package/src/validate/coverage.js +215 -0
- package/src/validate/determinism.js +115 -0
- package/src/validate/diagnostics-strict.js +392 -0
- package/src/validate/errors.js +19 -0
- package/src/validate/index.js +20 -0
- package/src/validate/integrity.js +41 -0
- package/src/validate/invariants.js +157 -0
- package/src/validate/references.js +110 -0
- package/src/validate/schema.js +50 -0
package/src/run.js
ADDED
|
@@ -0,0 +1,278 @@
|
|
|
1
|
+
const { sha256Hex } = require("./core/determinism");
|
|
2
|
+
const { buildTokenIndex, buildTokenWikiById } = require("./core/tokens");
|
|
3
|
+
const { buildAcceptedAnnotationsInventory, buildMentions } = require("./core/mentions");
|
|
4
|
+
const { collectStep11Relations, buildProjectedRelations } = require("./core/projection");
|
|
5
|
+
const { buildAssertions } = require("./core/assertions");
|
|
6
|
+
const { buildUnresolved, buildDiagnostics } = require("./core/diagnostics");
|
|
7
|
+
const { buildWikiTitleEvidenceFromUpstream, buildCoverageDomainMentionIds, buildOutput } = require("./core/output");
|
|
8
|
+
const { hasPositiveWikiSignal } = require("./core/mentions");
|
|
9
|
+
const { normalizeOptionalString } = require("./core/strings");
|
|
10
|
+
const { rejectLegacySlots } = require("./validate/schema");
|
|
11
|
+
|
|
12
|
+
function effectiveWtiTimeoutMs(timeoutMs) {
|
|
13
|
+
return Number.isFinite(timeoutMs) && timeoutMs > 0 ? timeoutMs : 2000;
|
|
14
|
+
}
|
|
15
|
+
|
|
16
|
+
async function ensureWtiEndpointReachable(endpoint, timeoutMs) {
|
|
17
|
+
const normalized = normalizeOptionalString(endpoint);
|
|
18
|
+
if (!normalized) throw new Error("WTI endpoint is required for runElementaryAssertions.");
|
|
19
|
+
|
|
20
|
+
const url = `${normalized.replace(/\/$/, "")}/health`;
|
|
21
|
+
const controller = new AbortController();
|
|
22
|
+
const timer = setTimeout(() => controller.abort(), effectiveWtiTimeoutMs(timeoutMs));
|
|
23
|
+
try {
|
|
24
|
+
const response = await fetch(url, { method: "GET", signal: controller.signal });
|
|
25
|
+
if (response.status !== 200) {
|
|
26
|
+
throw new Error(`HTTP ${response.status}`);
|
|
27
|
+
}
|
|
28
|
+
} catch (err) {
|
|
29
|
+
const detail = err && err.message ? err.message : String(err);
|
|
30
|
+
throw new Error(`wikipedia-title-index health check failed for ${url}: ${detail}`);
|
|
31
|
+
} finally {
|
|
32
|
+
clearTimeout(timer);
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
function validateRelationsInput(relationsDoc) {
|
|
37
|
+
if (!relationsDoc || typeof relationsDoc !== "object") {
|
|
38
|
+
throw new Error("runFromRelations requires an object input document.");
|
|
39
|
+
}
|
|
40
|
+
if (!Array.isArray(relationsDoc.tokens)) {
|
|
41
|
+
throw new Error("runFromRelations input must include tokens[].");
|
|
42
|
+
}
|
|
43
|
+
if (!Array.isArray(relationsDoc.annotations)) {
|
|
44
|
+
throw new Error("runFromRelations input must include annotations[].");
|
|
45
|
+
}
|
|
46
|
+
if (!Array.isArray(relationsDoc.segments)) {
|
|
47
|
+
throw new Error("runFromRelations input must include segments[].");
|
|
48
|
+
}
|
|
49
|
+
if (typeof relationsDoc.canonical_text !== "string") {
|
|
50
|
+
throw new Error("runFromRelations input must include canonical_text.");
|
|
51
|
+
}
|
|
52
|
+
const tokenIds = new Set();
|
|
53
|
+
for (const token of relationsDoc.tokens) {
|
|
54
|
+
if (token && typeof token.id === "string" && token.id.length > 0) tokenIds.add(token.id);
|
|
55
|
+
}
|
|
56
|
+
for (const annotation of relationsDoc.annotations) {
|
|
57
|
+
if (!annotation || typeof annotation !== "object") continue;
|
|
58
|
+
if (annotation.status !== "accepted") continue;
|
|
59
|
+
|
|
60
|
+
if (annotation.kind === "dependency") {
|
|
61
|
+
const headId = annotation && annotation.head && typeof annotation.head.id === "string" ? annotation.head.id : "";
|
|
62
|
+
const depId = annotation && annotation.dep && typeof annotation.dep.id === "string" ? annotation.dep.id : "";
|
|
63
|
+
if (!headId || !depId) {
|
|
64
|
+
throw new Error("runFromRelations accepted dependency annotation is missing head.id or dep.id.");
|
|
65
|
+
}
|
|
66
|
+
if (!tokenIds.has(headId) || !tokenIds.has(depId)) {
|
|
67
|
+
throw new Error("runFromRelations accepted dependency annotation references unknown token id.");
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const selectors =
|
|
72
|
+
annotation &&
|
|
73
|
+
annotation.anchor &&
|
|
74
|
+
Array.isArray(annotation.anchor.selectors)
|
|
75
|
+
? annotation.anchor.selectors
|
|
76
|
+
: [];
|
|
77
|
+
for (const selector of selectors) {
|
|
78
|
+
if (!selector || typeof selector !== "object") continue;
|
|
79
|
+
if (selector.type === "TokenSelector" && Array.isArray(selector.token_ids)) {
|
|
80
|
+
for (const tokenId of selector.token_ids) {
|
|
81
|
+
if (!tokenIds.has(tokenId)) {
|
|
82
|
+
throw new Error("runFromRelations accepted annotation TokenSelector references unknown token id.");
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
if (selector.type === "TextPositionSelector" && selector.span && typeof selector.span === "object") {
|
|
87
|
+
const { start, end } = selector.span;
|
|
88
|
+
if (typeof start !== "number" || typeof end !== "number" || start > end) {
|
|
89
|
+
throw new Error("runFromRelations accepted annotation TextPositionSelector has invalid span.");
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
rejectLegacySlots(relationsDoc);
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
function schemaVersionFromRelations(relationsDoc) {
|
|
98
|
+
return typeof relationsDoc.schema_version === "string" && relationsDoc.schema_version.length > 0
|
|
99
|
+
? relationsDoc.schema_version
|
|
100
|
+
: undefined;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
function assertMandatoryWtiUpstreamEvidence(relationsSeed) {
|
|
104
|
+
const tokens = Array.isArray(relationsSeed && relationsSeed.tokens) ? relationsSeed.tokens : [];
|
|
105
|
+
let carrierCount = 0;
|
|
106
|
+
let positiveCount = 0;
|
|
107
|
+
for (const token of tokens) {
|
|
108
|
+
if (!token || !token.lexicon || typeof token.lexicon !== "object") continue;
|
|
109
|
+
if (!Object.prototype.hasOwnProperty.call(token.lexicon, "wikipedia_title_index")) continue;
|
|
110
|
+
const carrier = token.lexicon.wikipedia_title_index;
|
|
111
|
+
if (!carrier || typeof carrier !== "object" || Array.isArray(carrier)) {
|
|
112
|
+
throw new Error("WTI evidence missing: linguistic-enricher produced no positive wikipedia_title_index signals.");
|
|
113
|
+
}
|
|
114
|
+
carrierCount += 1;
|
|
115
|
+
if (hasPositiveWikiSignal(carrier)) positiveCount += 1;
|
|
116
|
+
}
|
|
117
|
+
if (carrierCount === 0 || positiveCount === 0) {
|
|
118
|
+
throw new Error("WTI evidence missing: linguistic-enricher produced no positive wikipedia_title_index signals.");
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
function buildRunPipelineTrace(relationsSeed, runOptions, wtiEndpoint) {
|
|
123
|
+
return {
|
|
124
|
+
target: String(runOptions && runOptions.target ? runOptions.target : ""),
|
|
125
|
+
relations_extracted_digest: sha256Hex(JSON.stringify(relationsSeed || {})),
|
|
126
|
+
token_count: Array.isArray(relationsSeed && relationsSeed.tokens) ? relationsSeed.tokens.length : 0,
|
|
127
|
+
annotation_count: Array.isArray(relationsSeed && relationsSeed.annotations) ? relationsSeed.annotations.length : 0,
|
|
128
|
+
wikipedia_title_index_configured: Boolean(normalizeOptionalString(wtiEndpoint)),
|
|
129
|
+
};
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
function inMemorySourceInput(artifact, digest) {
|
|
133
|
+
return {
|
|
134
|
+
artifact,
|
|
135
|
+
digest,
|
|
136
|
+
origin: { kind: "in_memory" },
|
|
137
|
+
};
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
function runFromRelations(relationsDoc, options = {}) {
|
|
141
|
+
validateRelationsInput(relationsDoc);
|
|
142
|
+
|
|
143
|
+
const relationsSeed = relationsDoc;
|
|
144
|
+
const schemaVersion = schemaVersionFromRelations(relationsSeed);
|
|
145
|
+
const tokenById = buildTokenIndex(relationsSeed);
|
|
146
|
+
const tokenWikiById = buildTokenWikiById(relationsSeed);
|
|
147
|
+
const acceptedAnnotations = buildAcceptedAnnotationsInventory(relationsSeed);
|
|
148
|
+
const stepRelations = collectStep11Relations(relationsSeed, tokenById);
|
|
149
|
+
|
|
150
|
+
const allAnnotations = Array.isArray(relationsSeed.annotations) ? relationsSeed.annotations : [];
|
|
151
|
+
const mweSeed = { annotations: allAnnotations.filter((a) => a && a.kind === "mwe" && a.status === "accepted") };
|
|
152
|
+
const headsSeed = {
|
|
153
|
+
annotations: allAnnotations.filter((a) => a && a.status === "accepted" && (a.kind === "chunk" || a.kind === "chunk_head")),
|
|
154
|
+
};
|
|
155
|
+
|
|
156
|
+
const mentionBuild = buildMentions({
|
|
157
|
+
relationsSeed,
|
|
158
|
+
mweSeed,
|
|
159
|
+
headsSeed,
|
|
160
|
+
tokenById,
|
|
161
|
+
tokenWikiById,
|
|
162
|
+
});
|
|
163
|
+
|
|
164
|
+
const mentionById = new Map(mentionBuild.mentions.map((m) => [m.id, m]));
|
|
165
|
+
const projectedBuild = buildProjectedRelations(
|
|
166
|
+
stepRelations,
|
|
167
|
+
mentionBuild.tokenToPrimaryMention,
|
|
168
|
+
mentionBuild.tokenToAllMentions,
|
|
169
|
+
mentionById,
|
|
170
|
+
tokenById
|
|
171
|
+
);
|
|
172
|
+
const assertionBuild = buildAssertions({
|
|
173
|
+
projected: projectedBuild.projected,
|
|
174
|
+
mentionById,
|
|
175
|
+
tokenById,
|
|
176
|
+
});
|
|
177
|
+
|
|
178
|
+
const coveragePrimaryMentionIds = buildCoverageDomainMentionIds(mentionBuild.mentions, tokenById);
|
|
179
|
+
const uncoveredPrimaryMentionIds = coveragePrimaryMentionIds.filter((id) => !assertionBuild.coveredMentions.has(id));
|
|
180
|
+
|
|
181
|
+
const unresolved = buildUnresolved({
|
|
182
|
+
mentions: mentionBuild.mentions,
|
|
183
|
+
unresolvedHeadMap: mentionBuild.unresolvedHeadMap,
|
|
184
|
+
projectedUnresolved: projectedBuild.unresolved,
|
|
185
|
+
mentionById,
|
|
186
|
+
assertions: assertionBuild.assertions,
|
|
187
|
+
projected: projectedBuild.projected,
|
|
188
|
+
uncoveredPrimaryMentionIds,
|
|
189
|
+
});
|
|
190
|
+
|
|
191
|
+
const sourceInputs = Array.isArray(options.sourceInputs) ? options.sourceInputs.slice() : [];
|
|
192
|
+
if (!options.suppressDefaultRelationsSource) {
|
|
193
|
+
sourceInputs.push(inMemorySourceInput("relations_extracted.in_memory", sha256Hex(JSON.stringify(relationsSeed || {}))));
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
const wikiTitleEvidence = buildWikiTitleEvidenceFromUpstream({
|
|
197
|
+
mentions: mentionBuild.mentions,
|
|
198
|
+
assertions: assertionBuild.assertions,
|
|
199
|
+
tokenById,
|
|
200
|
+
canonicalText: relationsSeed.canonical_text,
|
|
201
|
+
});
|
|
202
|
+
|
|
203
|
+
const diagnostics = buildDiagnostics({
|
|
204
|
+
tokenWikiById,
|
|
205
|
+
mentions: mentionBuild.mentions,
|
|
206
|
+
assertions: assertionBuild.assertions,
|
|
207
|
+
projectedBuild,
|
|
208
|
+
relationsSeed,
|
|
209
|
+
wtiEndpoint: normalizeOptionalString(options.wtiEndpoint),
|
|
210
|
+
suppressedAssertions: assertionBuild.suppressedAssertions,
|
|
211
|
+
});
|
|
212
|
+
|
|
213
|
+
return buildOutput({
|
|
214
|
+
schemaVersion,
|
|
215
|
+
relationsSeed,
|
|
216
|
+
mentions: mentionBuild.mentions,
|
|
217
|
+
assertions: assertionBuild.assertions,
|
|
218
|
+
coveredMentions: assertionBuild.coveredMentions,
|
|
219
|
+
unresolved,
|
|
220
|
+
sourceInputs,
|
|
221
|
+
pipelineTrace: buildRunPipelineTrace(relationsSeed, { target: "relations_extracted" }, normalizeOptionalString(options.wtiEndpoint)),
|
|
222
|
+
acceptedAnnotations,
|
|
223
|
+
diagnostics,
|
|
224
|
+
projectedBuild,
|
|
225
|
+
wikiTitleEvidence,
|
|
226
|
+
});
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
async function runElementaryAssertions(text, options = {}) {
|
|
230
|
+
if (typeof text !== "string" || text.length === 0) {
|
|
231
|
+
throw new Error("runElementaryAssertions requires non-empty text.");
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
const wtiEndpoint = normalizeOptionalString(
|
|
235
|
+
options && options.services && options.services["wikipedia-title-index"]
|
|
236
|
+
? options.services["wikipedia-title-index"].endpoint
|
|
237
|
+
: ""
|
|
238
|
+
);
|
|
239
|
+
|
|
240
|
+
if (!wtiEndpoint) {
|
|
241
|
+
throw new Error("runElementaryAssertions requires options.services['wikipedia-title-index'].endpoint.");
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
await ensureWtiEndpointReachable(wtiEndpoint, options.wtiTimeoutMs);
|
|
245
|
+
|
|
246
|
+
let linguisticEnricher;
|
|
247
|
+
try {
|
|
248
|
+
linguisticEnricher = require("linguistic-enricher");
|
|
249
|
+
} catch (err) {
|
|
250
|
+
throw new Error("Unable to load linguistic-enricher. Install it in the project root (npm i linguistic-enricher).");
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
const runOptions = {
|
|
254
|
+
target: "relations_extracted",
|
|
255
|
+
services: { "wikipedia-title-index": { endpoint: wtiEndpoint } },
|
|
256
|
+
};
|
|
257
|
+
if (Number.isFinite(options.timeoutMs) && options.timeoutMs > 0) {
|
|
258
|
+
runOptions.timeoutMs = options.timeoutMs;
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
const relationsSeed = await linguisticEnricher.runPipeline(text, runOptions);
|
|
262
|
+
assertMandatoryWtiUpstreamEvidence(relationsSeed);
|
|
263
|
+
|
|
264
|
+
return runFromRelations(relationsSeed, {
|
|
265
|
+
sourceInputs: Array.isArray(options.sourceInputs) && options.sourceInputs.length > 0
|
|
266
|
+
? options.sourceInputs.slice()
|
|
267
|
+
: [inMemorySourceInput("seed.text.in_memory", sha256Hex(text))],
|
|
268
|
+
wtiEndpoint,
|
|
269
|
+
});
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
module.exports = {
|
|
273
|
+
runFromRelations,
|
|
274
|
+
runElementaryAssertions,
|
|
275
|
+
normalizeOptionalString,
|
|
276
|
+
ensureWtiEndpointReachable,
|
|
277
|
+
assertMandatoryWtiUpstreamEvidence,
|
|
278
|
+
};
|