@agenr/openclaw-plugin 3.3.0 → 2026.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/build-before-turn-artifact-NPUHVWFE.js +71 -0
- package/dist/build-recall-artifact-F3LS3PZX.js +62 -0
- package/dist/chunk-5AXMFBHR.js +14 -0
- package/dist/chunk-5AYIXQRF.js +4452 -0
- package/dist/chunk-5TIP2EPP.js +6944 -0
- package/dist/chunk-GAERET5Q.js +2070 -0
- package/dist/chunk-GF3PX3VM.js +41 -0
- package/dist/chunk-GKZQ5AG5.js +44 -0
- package/dist/chunk-IBPS64W3.js +1069 -0
- package/dist/chunk-MC3C2XM5.js +148 -0
- package/dist/chunk-NSLTJBUC.js +270 -0
- package/dist/chunk-OJSIZDZD.js +9 -0
- package/dist/chunk-OWGQWQUP.js +45 -0
- package/dist/chunk-SIY3JA7T.js +3062 -0
- package/dist/chunk-SOQW7356.js +2416 -0
- package/dist/chunk-U74RE3L7.js +3233 -0
- package/dist/chunk-VBPYU7GO.js +597 -0
- package/dist/chunk-VTHBPXDQ.js +1750 -0
- package/dist/chunk-XFJ4S4G2.js +1679 -0
- package/dist/chunk-Y5NB3FTH.js +106 -0
- package/dist/chunk-ZX55JBV2.js +4451 -0
- package/dist/index.js +1855 -19846
- package/dist/lifecycle-checkpoint-IAC5FCQU.js +154 -0
- package/dist/scan-6JKPOQHD.js +6 -0
- package/dist/service-EKFACEN6.js +15 -0
- package/dist/service-RHNB5AEQ.js +861 -0
- package/dist/sink-AUAAWC5O.js +8 -0
- package/openclaw.plugin.json +2 -11
- package/package.json +1 -1
|
@@ -0,0 +1,2416 @@
|
|
|
1
|
+
import {
|
|
2
|
+
applyClaimKeyLifecycle,
|
|
3
|
+
buildExtractedClaimKeyLifecycle,
|
|
4
|
+
buildInferredIngestClaimKeySupportContext
|
|
5
|
+
} from "./chunk-VTHBPXDQ.js";
|
|
6
|
+
import {
|
|
7
|
+
assertKeyedDurableHasLifecycle,
|
|
8
|
+
compactClaimKey,
|
|
9
|
+
describeClaimKeyNormalizationFailure,
|
|
10
|
+
describeExtractedClaimKeyRejection,
|
|
11
|
+
inspectClaimKey,
|
|
12
|
+
isTrustedClaimKeyForCleanup,
|
|
13
|
+
normalizeClaimKey,
|
|
14
|
+
normalizeClaimKeySegment,
|
|
15
|
+
validateExtractedClaimKey
|
|
16
|
+
} from "./chunk-VBPYU7GO.js";
|
|
17
|
+
|
|
18
|
+
// src/app/dreaming/concurrency.ts
|
|
19
|
+
import { randomUUID } from "crypto";
|
|
20
|
+
var DREAMING_RUN_LEASE_BRAND = /* @__PURE__ */ Symbol("DreamingRunLease");
|
|
21
|
+
var inProcessRunLocks = /* @__PURE__ */ new Map();
|
|
22
|
+
var episodeWriteRefcounts = /* @__PURE__ */ new Map();
|
|
23
|
+
var DEFAULT_LOCK_HEARTBEAT_INTERVAL_MS = 5 * 60 * 1e3;
|
|
24
|
+
var DEFAULT_LOCK_WAIT_TIMEOUT_MS = 60 * 1e3;
|
|
25
|
+
var DEFAULT_LOCK_WAIT_POLL_MS = 500;
|
|
26
|
+
function resolveDreamingLockKey(dbPath) {
|
|
27
|
+
const trimmed = dbPath?.trim();
|
|
28
|
+
return trimmed && trimmed.length > 0 ? trimmed : ":memory:";
|
|
29
|
+
}
|
|
30
|
+
async function tryAcquireDreamingRunLock(port, dbPath) {
|
|
31
|
+
const lockKey = resolveDreamingLockKey(dbPath);
|
|
32
|
+
if (inProcessRunLocks.has(lockKey)) {
|
|
33
|
+
return null;
|
|
34
|
+
}
|
|
35
|
+
const token = randomUUID();
|
|
36
|
+
const acquired = await port.tryAcquireRunLock(token);
|
|
37
|
+
if (!acquired) {
|
|
38
|
+
return null;
|
|
39
|
+
}
|
|
40
|
+
inProcessRunLocks.set(lockKey, token);
|
|
41
|
+
return createDreamingRunLease(port, lockKey, token);
|
|
42
|
+
}
|
|
43
|
+
async function withDreamingRunLock(port, dbPath, fn) {
|
|
44
|
+
const lease = await tryAcquireDreamingRunLock(port, dbPath);
|
|
45
|
+
if (!lease) {
|
|
46
|
+
throw new Error("Dreaming run already in progress.");
|
|
47
|
+
}
|
|
48
|
+
return withHeldDreamingRunLock(lease, fn);
|
|
49
|
+
}
|
|
50
|
+
async function withHeldDreamingRunLock(lease, fn) {
|
|
51
|
+
let callbackError;
|
|
52
|
+
let result;
|
|
53
|
+
const stopHeartbeat = startDreamingRunLockHeartbeat(lease);
|
|
54
|
+
try {
|
|
55
|
+
await lease.heartbeat();
|
|
56
|
+
result = await fn(lease);
|
|
57
|
+
} catch (error) {
|
|
58
|
+
callbackError = error;
|
|
59
|
+
}
|
|
60
|
+
let cleanupError;
|
|
61
|
+
try {
|
|
62
|
+
await stopHeartbeat();
|
|
63
|
+
} catch (error) {
|
|
64
|
+
cleanupError = error;
|
|
65
|
+
}
|
|
66
|
+
try {
|
|
67
|
+
await lease.release();
|
|
68
|
+
} catch (error) {
|
|
69
|
+
if (cleanupError === void 0) {
|
|
70
|
+
cleanupError = error;
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
if (callbackError !== void 0) {
|
|
74
|
+
throw callbackError;
|
|
75
|
+
}
|
|
76
|
+
if (cleanupError !== void 0) {
|
|
77
|
+
throw cleanupError;
|
|
78
|
+
}
|
|
79
|
+
return result;
|
|
80
|
+
}
|
|
81
|
+
async function withEpisodeWriteGuard(input, fn) {
|
|
82
|
+
beginEpisodeWrite(input.dbPath);
|
|
83
|
+
try {
|
|
84
|
+
const lease = await waitForDreamingRunLock(input.port, input.dbPath, {
|
|
85
|
+
timeoutMs: input.waitTimeoutMs,
|
|
86
|
+
pollMs: input.waitPollMs
|
|
87
|
+
});
|
|
88
|
+
return await withHeldDreamingRunLock(lease, async () => fn());
|
|
89
|
+
} finally {
|
|
90
|
+
endEpisodeWrite(input.dbPath);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
function beginEpisodeWrite(dbPath) {
|
|
94
|
+
const lockKey = resolveDreamingLockKey(dbPath);
|
|
95
|
+
episodeWriteRefcounts.set(lockKey, (episodeWriteRefcounts.get(lockKey) ?? 0) + 1);
|
|
96
|
+
}
|
|
97
|
+
function endEpisodeWrite(dbPath) {
|
|
98
|
+
const lockKey = resolveDreamingLockKey(dbPath);
|
|
99
|
+
const next = (episodeWriteRefcounts.get(lockKey) ?? 0) - 1;
|
|
100
|
+
if (next <= 0) {
|
|
101
|
+
episodeWriteRefcounts.delete(lockKey);
|
|
102
|
+
return;
|
|
103
|
+
}
|
|
104
|
+
episodeWriteRefcounts.set(lockKey, next);
|
|
105
|
+
}
|
|
106
|
+
function isEpisodeWriteInProgress(dbPath) {
|
|
107
|
+
return (episodeWriteRefcounts.get(resolveDreamingLockKey(dbPath)) ?? 0) > 0;
|
|
108
|
+
}
|
|
109
|
+
function createDreamingRunLease(port, lockKey, token) {
|
|
110
|
+
let released = false;
|
|
111
|
+
return {
|
|
112
|
+
token,
|
|
113
|
+
[DREAMING_RUN_LEASE_BRAND]: true,
|
|
114
|
+
async heartbeat() {
|
|
115
|
+
if (released) {
|
|
116
|
+
return;
|
|
117
|
+
}
|
|
118
|
+
if (inProcessRunLocks.get(lockKey) !== token) {
|
|
119
|
+
throw new Error("Dreaming run lock was lost in this process.");
|
|
120
|
+
}
|
|
121
|
+
const retained = await port.heartbeatRunLock(token);
|
|
122
|
+
if (!retained) {
|
|
123
|
+
throw new Error("Dreaming run lock was lost.");
|
|
124
|
+
}
|
|
125
|
+
},
|
|
126
|
+
async release() {
|
|
127
|
+
if (released) {
|
|
128
|
+
return;
|
|
129
|
+
}
|
|
130
|
+
released = true;
|
|
131
|
+
if (inProcessRunLocks.get(lockKey) === token) {
|
|
132
|
+
inProcessRunLocks.delete(lockKey);
|
|
133
|
+
}
|
|
134
|
+
await port.releaseRunLock(token);
|
|
135
|
+
}
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
function startDreamingRunLockHeartbeat(lease) {
|
|
139
|
+
let heartbeatError;
|
|
140
|
+
let pendingHeartbeat = null;
|
|
141
|
+
const timer = setInterval(() => {
|
|
142
|
+
pendingHeartbeat = lease.heartbeat().catch((error) => {
|
|
143
|
+
heartbeatError = error;
|
|
144
|
+
});
|
|
145
|
+
}, DEFAULT_LOCK_HEARTBEAT_INTERVAL_MS);
|
|
146
|
+
timer.unref?.();
|
|
147
|
+
return async () => {
|
|
148
|
+
clearInterval(timer);
|
|
149
|
+
if (pendingHeartbeat) {
|
|
150
|
+
await pendingHeartbeat;
|
|
151
|
+
}
|
|
152
|
+
if (heartbeatError !== void 0) {
|
|
153
|
+
throw heartbeatError;
|
|
154
|
+
}
|
|
155
|
+
};
|
|
156
|
+
}
|
|
157
|
+
async function waitForDreamingRunLock(port, dbPath, options) {
|
|
158
|
+
const timeoutMs = options.timeoutMs ?? DEFAULT_LOCK_WAIT_TIMEOUT_MS;
|
|
159
|
+
const pollMs = options.pollMs ?? DEFAULT_LOCK_WAIT_POLL_MS;
|
|
160
|
+
const deadline = Date.now() + timeoutMs;
|
|
161
|
+
while (true) {
|
|
162
|
+
const lease = await tryAcquireDreamingRunLock(port, dbPath);
|
|
163
|
+
if (lease) {
|
|
164
|
+
return lease;
|
|
165
|
+
}
|
|
166
|
+
if (Date.now() >= deadline) {
|
|
167
|
+
throw new Error("Timed out waiting for dreaming run lock before episode write.");
|
|
168
|
+
}
|
|
169
|
+
await sleep(Math.min(pollMs, Math.max(1, deadline - Date.now())));
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
function sleep(ms) {
|
|
173
|
+
return new Promise((resolve) => {
|
|
174
|
+
setTimeout(resolve, ms);
|
|
175
|
+
});
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
// src/core/claim-key-entity-family.ts
|
|
179
|
+
var ENTITY_FAMILY_GROUNDING_STOP_TOKENS = /* @__PURE__ */ new Set([
|
|
180
|
+
"a",
|
|
181
|
+
"an",
|
|
182
|
+
"and",
|
|
183
|
+
"are",
|
|
184
|
+
"as",
|
|
185
|
+
"at",
|
|
186
|
+
"be",
|
|
187
|
+
"by",
|
|
188
|
+
"for",
|
|
189
|
+
"from",
|
|
190
|
+
"in",
|
|
191
|
+
"into",
|
|
192
|
+
"is",
|
|
193
|
+
"it",
|
|
194
|
+
"of",
|
|
195
|
+
"on",
|
|
196
|
+
"or",
|
|
197
|
+
"that",
|
|
198
|
+
"the",
|
|
199
|
+
"their",
|
|
200
|
+
"this",
|
|
201
|
+
"to",
|
|
202
|
+
"was",
|
|
203
|
+
"with"
|
|
204
|
+
]);
|
|
205
|
+
var MAX_ATTRIBUTE_BUCKET_SIZE = 12;
|
|
206
|
+
var MAX_EVIDENCE_VALUES = 6;
|
|
207
|
+
var CANONICAL_SELECTION_MARGIN = 3;
|
|
208
|
+
var SINGLETON_ALIAS_MAX_FAMILY_SIZE = 2;
|
|
209
|
+
var SINGLETON_ALIAS_MIN_DOMINANT_TRUSTED_COUNT = 3;
|
|
210
|
+
var SINGLETON_ALIAS_MIN_CONFIDENCE_DELTA = 0.05;
|
|
211
|
+
var SINGLETON_ALIAS_SCOPE_TOKENS = /* @__PURE__ */ new Set([
|
|
212
|
+
"agent",
|
|
213
|
+
"app",
|
|
214
|
+
"branch",
|
|
215
|
+
"build",
|
|
216
|
+
"cluster",
|
|
217
|
+
"daemon",
|
|
218
|
+
"device",
|
|
219
|
+
"env",
|
|
220
|
+
"environment",
|
|
221
|
+
"gateway",
|
|
222
|
+
"host",
|
|
223
|
+
"machine",
|
|
224
|
+
"node",
|
|
225
|
+
"plugin",
|
|
226
|
+
"project",
|
|
227
|
+
"repo",
|
|
228
|
+
"repository",
|
|
229
|
+
"server",
|
|
230
|
+
"service",
|
|
231
|
+
"session",
|
|
232
|
+
"system",
|
|
233
|
+
"workspace"
|
|
234
|
+
]);
|
|
235
|
+
function detectClaimKeyEntityFamilyCandidates(entries) {
|
|
236
|
+
const profiles = buildTrustedClaimKeyEntityProfiles(entries);
|
|
237
|
+
if (profiles.size < 2) {
|
|
238
|
+
return [];
|
|
239
|
+
}
|
|
240
|
+
const pairSupport = buildPairSupport(profiles);
|
|
241
|
+
if (pairSupport.length === 0) {
|
|
242
|
+
return [];
|
|
243
|
+
}
|
|
244
|
+
const adjacency = /* @__PURE__ */ new Map();
|
|
245
|
+
for (const support of pairSupport) {
|
|
246
|
+
const [leftEntity, rightEntity] = support.entityPrefixes;
|
|
247
|
+
getOrCreateSet(adjacency, leftEntity).add(rightEntity);
|
|
248
|
+
getOrCreateSet(adjacency, rightEntity).add(leftEntity);
|
|
249
|
+
}
|
|
250
|
+
const visited = /* @__PURE__ */ new Set();
|
|
251
|
+
const families = [];
|
|
252
|
+
for (const entityPrefix of adjacency.keys()) {
|
|
253
|
+
if (visited.has(entityPrefix)) {
|
|
254
|
+
continue;
|
|
255
|
+
}
|
|
256
|
+
const component = collectConnectedEntityComponent(entityPrefix, adjacency, visited);
|
|
257
|
+
if (component.length < 2) {
|
|
258
|
+
continue;
|
|
259
|
+
}
|
|
260
|
+
const componentSet = new Set(component);
|
|
261
|
+
const componentSupport = pairSupport.filter((support) => {
|
|
262
|
+
const [leftEntity, rightEntity] = support.entityPrefixes;
|
|
263
|
+
return componentSet.has(leftEntity) && componentSet.has(rightEntity);
|
|
264
|
+
});
|
|
265
|
+
const canonicalSelection = selectCanonicalEntityPrefix(component, componentSupport, profiles);
|
|
266
|
+
const canonicalEntityPrefix = canonicalSelection.canonicalEntityPrefix;
|
|
267
|
+
const autoConverge = canonicalEntityPrefix !== null && component.filter((entity) => entity !== canonicalEntityPrefix).every((entity) => {
|
|
268
|
+
const support = findPairSupport(componentSupport, canonicalEntityPrefix, entity);
|
|
269
|
+
return support?.autoSafe === true;
|
|
270
|
+
});
|
|
271
|
+
const componentProfiles = component.map((entity) => profiles.get(entity)).filter((profile) => Boolean(profile));
|
|
272
|
+
const durableIds = normalizeStringArray(componentProfiles.flatMap((profile) => [...profile.durableIds]));
|
|
273
|
+
const claimKeys = normalizeStringArray(componentProfiles.flatMap((profile) => [...profile.claimKeys]));
|
|
274
|
+
const confidence = componentSupport.length > 0 ? Math.max(...componentSupport.map((support) => support.confidence)) : 0.75;
|
|
275
|
+
families.push({
|
|
276
|
+
entityPrefixes: [...component].sort((left, right) => left.localeCompare(right)),
|
|
277
|
+
durableIds,
|
|
278
|
+
claimKeys,
|
|
279
|
+
canonicalEntityPrefix,
|
|
280
|
+
canonicalSelectionReasons: canonicalSelection.reasons,
|
|
281
|
+
confidence,
|
|
282
|
+
autoConverge,
|
|
283
|
+
unresolvedReason: canonicalSelection.unresolvedReason ?? (autoConverge ? null : "Entity-family evidence is strong enough to stage, but not every alias mapping is low-risk."),
|
|
284
|
+
pairSupport: componentSupport.sort((left, right) => {
|
|
285
|
+
const leftKey = left.entityPrefixes.join("::");
|
|
286
|
+
const rightKey = right.entityPrefixes.join("::");
|
|
287
|
+
return leftKey.localeCompare(rightKey);
|
|
288
|
+
})
|
|
289
|
+
});
|
|
290
|
+
}
|
|
291
|
+
return families.sort((left, right) => {
|
|
292
|
+
if (right.confidence !== left.confidence) {
|
|
293
|
+
return right.confidence - left.confidence;
|
|
294
|
+
}
|
|
295
|
+
const leftKey = left.entityPrefixes.join("::");
|
|
296
|
+
const rightKey = right.entityPrefixes.join("::");
|
|
297
|
+
return leftKey.localeCompare(rightKey);
|
|
298
|
+
});
|
|
299
|
+
}
|
|
300
|
+
function summarizeClaimKeyEntityPrefixStats(observations) {
|
|
301
|
+
const counts = /* @__PURE__ */ new Map();
|
|
302
|
+
for (const observation of observations) {
|
|
303
|
+
const rawClaimKey = observation.claim_key?.trim();
|
|
304
|
+
if (!rawClaimKey) {
|
|
305
|
+
continue;
|
|
306
|
+
}
|
|
307
|
+
const inspection = inspectClaimKey(rawClaimKey);
|
|
308
|
+
if (!inspection.normalized) {
|
|
309
|
+
continue;
|
|
310
|
+
}
|
|
311
|
+
assertKeyedDurableHasLifecycle(observation);
|
|
312
|
+
const entityPrefix = inspection.normalized.entity;
|
|
313
|
+
const existing = counts.get(entityPrefix) ?? {
|
|
314
|
+
entityPrefix,
|
|
315
|
+
activeEntryCount: 0,
|
|
316
|
+
trustedEntryCount: 0,
|
|
317
|
+
tentativeEntryCount: 0,
|
|
318
|
+
unresolvedEntryCount: 0,
|
|
319
|
+
deterministicRepairEntryCount: 0,
|
|
320
|
+
manualEntryCount: 0,
|
|
321
|
+
modelEntryCount: 0,
|
|
322
|
+
jsonRetryEntryCount: 0,
|
|
323
|
+
dreamingFamilyReuseDurableCount: 0
|
|
324
|
+
};
|
|
325
|
+
existing.activeEntryCount += 1;
|
|
326
|
+
switch (observation.claim_key_status) {
|
|
327
|
+
case "trusted":
|
|
328
|
+
existing.trustedEntryCount += 1;
|
|
329
|
+
break;
|
|
330
|
+
case "tentative":
|
|
331
|
+
existing.tentativeEntryCount += 1;
|
|
332
|
+
break;
|
|
333
|
+
case "unresolved":
|
|
334
|
+
existing.unresolvedEntryCount += 1;
|
|
335
|
+
break;
|
|
336
|
+
}
|
|
337
|
+
switch (observation.claim_key_source) {
|
|
338
|
+
case "deterministic_repair":
|
|
339
|
+
existing.deterministicRepairEntryCount += 1;
|
|
340
|
+
break;
|
|
341
|
+
case "manual":
|
|
342
|
+
existing.manualEntryCount += 1;
|
|
343
|
+
break;
|
|
344
|
+
case "model":
|
|
345
|
+
existing.modelEntryCount += 1;
|
|
346
|
+
break;
|
|
347
|
+
case "json_retry":
|
|
348
|
+
existing.jsonRetryEntryCount += 1;
|
|
349
|
+
break;
|
|
350
|
+
case "dreaming_reconcile":
|
|
351
|
+
existing.dreamingFamilyReuseDurableCount += 1;
|
|
352
|
+
break;
|
|
353
|
+
default:
|
|
354
|
+
break;
|
|
355
|
+
}
|
|
356
|
+
counts.set(entityPrefix, existing);
|
|
357
|
+
}
|
|
358
|
+
return [...counts.values()].sort((left, right) => {
|
|
359
|
+
if (right.activeEntryCount !== left.activeEntryCount) {
|
|
360
|
+
return right.activeEntryCount - left.activeEntryCount;
|
|
361
|
+
}
|
|
362
|
+
if (right.trustedEntryCount !== left.trustedEntryCount) {
|
|
363
|
+
return right.trustedEntryCount - left.trustedEntryCount;
|
|
364
|
+
}
|
|
365
|
+
return left.entityPrefix.localeCompare(right.entityPrefix);
|
|
366
|
+
});
|
|
367
|
+
}
|
|
368
|
+
function detectClaimKeySingletonAliasCandidates(observations) {
|
|
369
|
+
return detectClaimKeySingletonAliasCandidatesFromStats(summarizeClaimKeyEntityPrefixStats(observations));
|
|
370
|
+
}
|
|
371
|
+
function detectClaimKeySingletonAliasCandidatesFromStats(stats) {
|
|
372
|
+
const candidatesByAlias = /* @__PURE__ */ new Map();
|
|
373
|
+
const dominantFamilies = stats.filter((profile) => profile.trustedEntryCount >= SINGLETON_ALIAS_MIN_DOMINANT_TRUSTED_COUNT);
|
|
374
|
+
const aliasFamilies = stats.filter((profile) => {
|
|
375
|
+
return profile.activeEntryCount > 0 && profile.activeEntryCount <= SINGLETON_ALIAS_MAX_FAMILY_SIZE && profile.trustedEntryCount < profile.activeEntryCount && buildLowTrustEntryCount(profile) >= 1;
|
|
376
|
+
});
|
|
377
|
+
for (const aliasProfile of aliasFamilies) {
|
|
378
|
+
for (const dominantProfile of dominantFamilies) {
|
|
379
|
+
if (aliasProfile.entityPrefix === dominantProfile.entityPrefix || dominantProfile.activeEntryCount <= aliasProfile.activeEntryCount) {
|
|
380
|
+
continue;
|
|
381
|
+
}
|
|
382
|
+
const candidate = evaluateSingletonAliasCandidate(aliasProfile, dominantProfile);
|
|
383
|
+
if (!candidate) {
|
|
384
|
+
continue;
|
|
385
|
+
}
|
|
386
|
+
const existing = candidatesByAlias.get(aliasProfile.entityPrefix) ?? [];
|
|
387
|
+
existing.push(candidate);
|
|
388
|
+
candidatesByAlias.set(aliasProfile.entityPrefix, existing);
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
return [...candidatesByAlias.values()].flatMap(selectBestSingletonAliasCandidate).sort((left, right) => right.confidence - left.confidence || left.aliasEntityPrefix.localeCompare(right.aliasEntityPrefix));
|
|
392
|
+
}
|
|
393
|
+
function buildTrustedClaimKeyEntityProfiles(entries) {
|
|
394
|
+
const profiles = /* @__PURE__ */ new Map();
|
|
395
|
+
for (const entry of entries) {
|
|
396
|
+
const rawClaimKey = entry.claim_key?.trim();
|
|
397
|
+
if (!rawClaimKey) {
|
|
398
|
+
continue;
|
|
399
|
+
}
|
|
400
|
+
const inspection = inspectClaimKey(rawClaimKey);
|
|
401
|
+
if (!inspection.canonical || !inspection.normalized || inspection.suspectReasons.length > 0) {
|
|
402
|
+
continue;
|
|
403
|
+
}
|
|
404
|
+
const entityPrefix = inspection.normalized.entity;
|
|
405
|
+
const attribute = inspection.normalized.attribute;
|
|
406
|
+
const profile = getOrCreateProfile(profiles, entityPrefix);
|
|
407
|
+
profile.durableIds.add(entry.id);
|
|
408
|
+
profile.claimKeys.add(inspection.normalized.claimKey);
|
|
409
|
+
profile.attributeSet.add(attribute);
|
|
410
|
+
const [attributeHead = attribute] = attribute.split("_");
|
|
411
|
+
if (attributeHead) {
|
|
412
|
+
profile.attributeHeadSet.add(attributeHead);
|
|
413
|
+
}
|
|
414
|
+
for (const tag of entry.tags) {
|
|
415
|
+
const normalizedTag = normalizeClaimKeySegment(tag);
|
|
416
|
+
if (normalizedTag) {
|
|
417
|
+
profile.tags.add(normalizedTag);
|
|
418
|
+
}
|
|
419
|
+
}
|
|
420
|
+
for (const token of tokenizeGrounding(entry.source_context)) {
|
|
421
|
+
profile.sourceContextTokens.add(token);
|
|
422
|
+
}
|
|
423
|
+
for (const token of tokenizeGrounding(entry.subject)) {
|
|
424
|
+
profile.subjectTokens.add(token);
|
|
425
|
+
}
|
|
426
|
+
profile.entryCount += 1;
|
|
427
|
+
profile.totalQualityScore += entry.quality_score;
|
|
428
|
+
}
|
|
429
|
+
return profiles;
|
|
430
|
+
}
|
|
431
|
+
function getOrCreateProfile(profiles, entityPrefix) {
|
|
432
|
+
const existing = profiles.get(entityPrefix);
|
|
433
|
+
if (existing) {
|
|
434
|
+
return existing;
|
|
435
|
+
}
|
|
436
|
+
const tokenList = entityPrefix.split("_").filter((token) => token.length > 0);
|
|
437
|
+
const created = {
|
|
438
|
+
entityPrefix,
|
|
439
|
+
durableIds: /* @__PURE__ */ new Set(),
|
|
440
|
+
claimKeys: /* @__PURE__ */ new Set(),
|
|
441
|
+
attributeSet: /* @__PURE__ */ new Set(),
|
|
442
|
+
attributeHeadSet: /* @__PURE__ */ new Set(),
|
|
443
|
+
tags: /* @__PURE__ */ new Set(),
|
|
444
|
+
sourceContextTokens: /* @__PURE__ */ new Set(),
|
|
445
|
+
subjectTokens: /* @__PURE__ */ new Set(),
|
|
446
|
+
entryCount: 0,
|
|
447
|
+
totalQualityScore: 0,
|
|
448
|
+
tokenList,
|
|
449
|
+
sortedTokenSignature: [...tokenList].sort().join("_"),
|
|
450
|
+
compactSignature: tokenList.join("")
|
|
451
|
+
};
|
|
452
|
+
profiles.set(entityPrefix, created);
|
|
453
|
+
return created;
|
|
454
|
+
}
|
|
455
|
+
function buildPairSupport(profiles) {
|
|
456
|
+
const candidatePairs = /* @__PURE__ */ new Set();
|
|
457
|
+
const attributeBuckets = /* @__PURE__ */ new Map();
|
|
458
|
+
for (const profile of profiles.values()) {
|
|
459
|
+
for (const attribute of profile.attributeSet) {
|
|
460
|
+
const bucket = attributeBuckets.get(attribute);
|
|
461
|
+
if (bucket) {
|
|
462
|
+
bucket.push(profile.entityPrefix);
|
|
463
|
+
} else {
|
|
464
|
+
attributeBuckets.set(attribute, [profile.entityPrefix]);
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
}
|
|
468
|
+
for (const entities of attributeBuckets.values()) {
|
|
469
|
+
const normalizedEntities = normalizeStringArray(entities);
|
|
470
|
+
if (normalizedEntities.length < 2 || normalizedEntities.length > MAX_ATTRIBUTE_BUCKET_SIZE) {
|
|
471
|
+
continue;
|
|
472
|
+
}
|
|
473
|
+
for (let index = 0; index < normalizedEntities.length; index += 1) {
|
|
474
|
+
const leftEntity = normalizedEntities[index];
|
|
475
|
+
if (!leftEntity) {
|
|
476
|
+
continue;
|
|
477
|
+
}
|
|
478
|
+
for (let peerIndex = index + 1; peerIndex < normalizedEntities.length; peerIndex += 1) {
|
|
479
|
+
const rightEntity = normalizedEntities[peerIndex];
|
|
480
|
+
if (!rightEntity) {
|
|
481
|
+
continue;
|
|
482
|
+
}
|
|
483
|
+
candidatePairs.add(buildPairKey(leftEntity, rightEntity));
|
|
484
|
+
}
|
|
485
|
+
}
|
|
486
|
+
}
|
|
487
|
+
const support = [];
|
|
488
|
+
for (const pairKey of candidatePairs) {
|
|
489
|
+
const [leftEntity = "", rightEntity = ""] = pairKey.split("::");
|
|
490
|
+
const leftProfile = profiles.get(leftEntity);
|
|
491
|
+
const rightProfile = profiles.get(rightEntity);
|
|
492
|
+
if (!leftProfile || !rightProfile) {
|
|
493
|
+
continue;
|
|
494
|
+
}
|
|
495
|
+
const pairSupport = evaluateEntityFamilyPairSupport(leftProfile, rightProfile);
|
|
496
|
+
if (pairSupport) {
|
|
497
|
+
support.push(pairSupport);
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
return support;
|
|
501
|
+
}
|
|
502
|
+
function evaluateEntityFamilyPairSupport(leftProfile, rightProfile) {
|
|
503
|
+
const sharedAttributes = intersectSets(leftProfile.attributeSet, rightProfile.attributeSet);
|
|
504
|
+
if (sharedAttributes.length === 0) {
|
|
505
|
+
return null;
|
|
506
|
+
}
|
|
507
|
+
const sharedAttributeHeads = intersectSets(leftProfile.attributeHeadSet, rightProfile.attributeHeadSet);
|
|
508
|
+
const sharedTags = intersectSets(leftProfile.tags, rightProfile.tags);
|
|
509
|
+
const sharedSourceContextTokens = intersectSets(leftProfile.sourceContextTokens, rightProfile.sourceContextTokens);
|
|
510
|
+
const sharedSubjectTokens = intersectSets(leftProfile.subjectTokens, rightProfile.subjectTokens);
|
|
511
|
+
const lexicalRelation = evaluateEntityLexicalRelation(leftProfile, rightProfile);
|
|
512
|
+
const groundingScore = (sharedTags.length > 0 ? 1 : 0) + (sharedSourceContextTokens.length >= 3 ? 1 : 0) + (sharedSubjectTokens.length >= 2 ? 1 : 0);
|
|
513
|
+
const groundingAnchorCount = (sharedTags.length > 0 ? 1 : 0) + (sharedSourceContextTokens.length >= 3 ? 1 : 0);
|
|
514
|
+
const qualifies = sharedAttributes.length >= 3 || sharedAttributes.length >= 2 && (lexicalRelation.kind !== null || groundingAnchorCount >= 1) || sharedAttributes.length === 1 && lexicalRelation.kind !== null && groundingAnchorCount >= 1;
|
|
515
|
+
if (!qualifies) {
|
|
516
|
+
return null;
|
|
517
|
+
}
|
|
518
|
+
const evidence = [
|
|
519
|
+
{
|
|
520
|
+
kind: "shared_attribute_overlap",
|
|
521
|
+
detail: `Shared attributes: ${sharedAttributes.slice(0, MAX_EVIDENCE_VALUES).join(", ")}`
|
|
522
|
+
}
|
|
523
|
+
];
|
|
524
|
+
if (sharedAttributeHeads.length >= 2) {
|
|
525
|
+
evidence.push({
|
|
526
|
+
kind: "shared_attribute_head_overlap",
|
|
527
|
+
detail: `Shared attribute families: ${sharedAttributeHeads.slice(0, MAX_EVIDENCE_VALUES).join(", ")}`
|
|
528
|
+
});
|
|
529
|
+
}
|
|
530
|
+
if (sharedTags.length > 0) {
|
|
531
|
+
evidence.push({
|
|
532
|
+
kind: "shared_tag_grounding",
|
|
533
|
+
detail: `Shared tags: ${sharedTags.slice(0, MAX_EVIDENCE_VALUES).join(", ")}`
|
|
534
|
+
});
|
|
535
|
+
}
|
|
536
|
+
if (sharedSourceContextTokens.length >= 2) {
|
|
537
|
+
evidence.push({
|
|
538
|
+
kind: "shared_source_context_grounding",
|
|
539
|
+
detail: `Shared source-context tokens: ${sharedSourceContextTokens.slice(0, MAX_EVIDENCE_VALUES).join(", ")}`
|
|
540
|
+
});
|
|
541
|
+
}
|
|
542
|
+
if (sharedSubjectTokens.length >= 2) {
|
|
543
|
+
evidence.push({
|
|
544
|
+
kind: "shared_subject_grounding",
|
|
545
|
+
detail: `Shared subject tokens: ${sharedSubjectTokens.slice(0, MAX_EVIDENCE_VALUES).join(", ")}`
|
|
546
|
+
});
|
|
547
|
+
}
|
|
548
|
+
if (lexicalRelation.kind && lexicalRelation.detail) {
|
|
549
|
+
evidence.push({
|
|
550
|
+
kind: lexicalRelation.kind,
|
|
551
|
+
detail: lexicalRelation.detail
|
|
552
|
+
});
|
|
553
|
+
}
|
|
554
|
+
const confidence = Math.min(
|
|
555
|
+
0.98,
|
|
556
|
+
0.48 + Math.min(sharedAttributes.length, 3) * 0.12 + Math.min(groundingScore, 3) * 0.08 + lexicalRelation.strengthScore * 0.05
|
|
557
|
+
);
|
|
558
|
+
return {
|
|
559
|
+
entityPrefixes: [leftProfile.entityPrefix, rightProfile.entityPrefix],
|
|
560
|
+
supportingDurableIds: normalizeStringArray([...leftProfile.durableIds, ...rightProfile.durableIds]),
|
|
561
|
+
sharedAttributes,
|
|
562
|
+
confidence,
|
|
563
|
+
autoSafe: lexicalRelation.autoSafe && (sharedAttributes.length >= 2 || sharedAttributes.length === 1 && groundingAnchorCount >= 1 && groundingScore >= 2),
|
|
564
|
+
preferredCanonicalEntityPrefix: lexicalRelation.preferredCanonicalEntityPrefix,
|
|
565
|
+
evidence
|
|
566
|
+
};
|
|
567
|
+
}
|
|
568
|
+
function evaluateEntityLexicalRelation(leftProfile, rightProfile) {
|
|
569
|
+
const leftTokens = leftProfile.tokenList;
|
|
570
|
+
const rightTokens = rightProfile.tokenList;
|
|
571
|
+
if (leftProfile.compactSignature === rightProfile.compactSignature && leftProfile.entityPrefix !== rightProfile.entityPrefix) {
|
|
572
|
+
const preferredCanonicalEntityPrefix = leftTokens.length === rightTokens.length ? null : leftTokens.length > rightTokens.length ? leftProfile.entityPrefix : rightProfile.entityPrefix;
|
|
573
|
+
return {
|
|
574
|
+
kind: "lexical_separator_variant",
|
|
575
|
+
detail: preferredCanonicalEntityPrefix === null ? "Entity prefixes collapse to the same compact lexical form." : `Entity prefixes collapse to the same compact lexical form; "${preferredCanonicalEntityPrefix}" preserves clearer token boundaries.`,
|
|
576
|
+
autoSafe: true,
|
|
577
|
+
preferredCanonicalEntityPrefix,
|
|
578
|
+
strengthScore: 3
|
|
579
|
+
};
|
|
580
|
+
}
|
|
581
|
+
if (leftProfile.sortedTokenSignature.length > 0 && leftProfile.sortedTokenSignature === rightProfile.sortedTokenSignature && leftProfile.entityPrefix !== rightProfile.entityPrefix) {
|
|
582
|
+
return {
|
|
583
|
+
kind: "lexical_token_reordering",
|
|
584
|
+
detail: "Entity prefixes use the same lexical tokens in a different order.",
|
|
585
|
+
autoSafe: true,
|
|
586
|
+
preferredCanonicalEntityPrefix: null,
|
|
587
|
+
strengthScore: 2
|
|
588
|
+
};
|
|
589
|
+
}
|
|
590
|
+
const leftInitialism = buildInitialism(leftTokens);
|
|
591
|
+
const rightInitialism = buildInitialism(rightTokens);
|
|
592
|
+
if (leftInitialism.length >= 2 && leftInitialism === rightProfile.entityPrefix) {
|
|
593
|
+
return {
|
|
594
|
+
kind: "lexical_initialism_expansion",
|
|
595
|
+
detail: `Entity prefix "${rightProfile.entityPrefix}" matches the initialism of "${leftProfile.entityPrefix}".`,
|
|
596
|
+
autoSafe: false,
|
|
597
|
+
preferredCanonicalEntityPrefix: leftProfile.entityPrefix,
|
|
598
|
+
strengthScore: 1
|
|
599
|
+
};
|
|
600
|
+
}
|
|
601
|
+
if (rightInitialism.length >= 2 && rightInitialism === leftProfile.entityPrefix) {
|
|
602
|
+
return {
|
|
603
|
+
kind: "lexical_initialism_expansion",
|
|
604
|
+
detail: `Entity prefix "${leftProfile.entityPrefix}" matches the initialism of "${rightProfile.entityPrefix}".`,
|
|
605
|
+
autoSafe: false,
|
|
606
|
+
preferredCanonicalEntityPrefix: rightProfile.entityPrefix,
|
|
607
|
+
strengthScore: 1
|
|
608
|
+
};
|
|
609
|
+
}
|
|
610
|
+
if (isTokenSubset(leftTokens, rightTokens)) {
|
|
611
|
+
return {
|
|
612
|
+
kind: "lexical_token_subset",
|
|
613
|
+
detail: `"${leftProfile.entityPrefix}" is a lexical subset of "${rightProfile.entityPrefix}".`,
|
|
614
|
+
autoSafe: false,
|
|
615
|
+
preferredCanonicalEntityPrefix: rightProfile.entityPrefix,
|
|
616
|
+
strengthScore: 1
|
|
617
|
+
};
|
|
618
|
+
}
|
|
619
|
+
if (isTokenSubset(rightTokens, leftTokens)) {
|
|
620
|
+
return {
|
|
621
|
+
kind: "lexical_token_subset",
|
|
622
|
+
detail: `"${rightProfile.entityPrefix}" is a lexical subset of "${leftProfile.entityPrefix}".`,
|
|
623
|
+
autoSafe: false,
|
|
624
|
+
preferredCanonicalEntityPrefix: leftProfile.entityPrefix,
|
|
625
|
+
strengthScore: 1
|
|
626
|
+
};
|
|
627
|
+
}
|
|
628
|
+
return {
|
|
629
|
+
kind: null,
|
|
630
|
+
detail: null,
|
|
631
|
+
autoSafe: false,
|
|
632
|
+
preferredCanonicalEntityPrefix: null,
|
|
633
|
+
strengthScore: 0
|
|
634
|
+
};
|
|
635
|
+
}
|
|
636
|
+
function selectCanonicalEntityPrefix(entityPrefixes, pairSupport, profiles) {
|
|
637
|
+
const scoreByEntity = /* @__PURE__ */ new Map();
|
|
638
|
+
const reasonsByEntity = /* @__PURE__ */ new Map();
|
|
639
|
+
for (const entityPrefix of entityPrefixes) {
|
|
640
|
+
const profile = profiles.get(entityPrefix);
|
|
641
|
+
if (!profile) {
|
|
642
|
+
continue;
|
|
643
|
+
}
|
|
644
|
+
let score = Math.min(profile.attributeSet.size, 6) * 2 + Math.min(profile.entryCount, 6) + Math.round(profile.totalQualityScore / Math.max(profile.entryCount, 1));
|
|
645
|
+
const reasons = [];
|
|
646
|
+
if (profile.attributeSet.size >= 2) {
|
|
647
|
+
reasons.push("broader trusted attribute coverage");
|
|
648
|
+
}
|
|
649
|
+
for (const support of pairSupport) {
|
|
650
|
+
if (support.preferredCanonicalEntityPrefix === entityPrefix) {
|
|
651
|
+
score += 4;
|
|
652
|
+
} else if (support.preferredCanonicalEntityPrefix !== null) {
|
|
653
|
+
score -= 2;
|
|
654
|
+
}
|
|
655
|
+
}
|
|
656
|
+
const formSpecificity = scoreEntityFormSpecificity(entityPrefix, entityPrefixes, profiles);
|
|
657
|
+
score += formSpecificity.score;
|
|
658
|
+
if (formSpecificity.reason) {
|
|
659
|
+
reasons.push(formSpecificity.reason);
|
|
660
|
+
}
|
|
661
|
+
const lexicalVotes2 = pairSupport.filter((support) => support.preferredCanonicalEntityPrefix === entityPrefix).length;
|
|
662
|
+
if (lexicalVotes2 > 0) {
|
|
663
|
+
reasons.push(`lexical alias evidence prefers "${entityPrefix}"`);
|
|
664
|
+
}
|
|
665
|
+
scoreByEntity.set(entityPrefix, score);
|
|
666
|
+
reasonsByEntity.set(entityPrefix, normalizeStringArray(reasons));
|
|
667
|
+
}
|
|
668
|
+
const ranked = [...scoreByEntity.entries()].sort((left, right) => right[1] - left[1] || left[0].localeCompare(right[0]));
|
|
669
|
+
const [bestCandidate, secondCandidate] = ranked;
|
|
670
|
+
if (!bestCandidate) {
|
|
671
|
+
return {
|
|
672
|
+
canonicalEntityPrefix: null,
|
|
673
|
+
reasons: [],
|
|
674
|
+
unresolvedReason: "No canonical entity prefix could be selected from the detected family."
|
|
675
|
+
};
|
|
676
|
+
}
|
|
677
|
+
const [bestEntityPrefix, bestScore] = bestCandidate;
|
|
678
|
+
const secondScore = secondCandidate?.[1] ?? Number.NEGATIVE_INFINITY;
|
|
679
|
+
const bestProfile = profiles.get(bestEntityPrefix);
|
|
680
|
+
if (!bestProfile) {
|
|
681
|
+
return {
|
|
682
|
+
canonicalEntityPrefix: null,
|
|
683
|
+
reasons: [],
|
|
684
|
+
unresolvedReason: "No canonical entity prefix could be selected from the detected family."
|
|
685
|
+
};
|
|
686
|
+
}
|
|
687
|
+
const directPeerSupport = entityPrefixes.filter((entityPrefix) => entityPrefix !== bestEntityPrefix).map((entityPrefix) => findPairSupport(pairSupport, bestEntityPrefix, entityPrefix));
|
|
688
|
+
const hasDirectSupportToAllPeers = directPeerSupport.every((support) => support !== null);
|
|
689
|
+
const hasLexicalSupportToAllPeers = directPeerSupport.every((support) => support?.evidence.some((evidence) => evidence.kind.startsWith("lexical_")) === true);
|
|
690
|
+
const lexicalVotes = pairSupport.filter((support) => support.preferredCanonicalEntityPrefix === bestEntityPrefix).length;
|
|
691
|
+
if (!hasDirectSupportToAllPeers || !hasLexicalSupportToAllPeers || lexicalVotes === 0 || bestScore - secondScore < CANONICAL_SELECTION_MARGIN) {
|
|
692
|
+
return {
|
|
693
|
+
canonicalEntityPrefix: null,
|
|
694
|
+
reasons: [],
|
|
695
|
+
unresolvedReason: "Multiple plausible canonical entity prefixes remain after conservative scoring."
|
|
696
|
+
};
|
|
697
|
+
}
|
|
698
|
+
return {
|
|
699
|
+
canonicalEntityPrefix: bestEntityPrefix,
|
|
700
|
+
reasons: reasonsByEntity.get(bestEntityPrefix) ?? [],
|
|
701
|
+
unresolvedReason: null
|
|
702
|
+
};
|
|
703
|
+
}
|
|
704
|
+
function scoreEntityFormSpecificity(entityPrefix, entityPrefixes, profiles) {
|
|
705
|
+
const profile = profiles.get(entityPrefix);
|
|
706
|
+
if (!profile) {
|
|
707
|
+
return { score: 0, reason: null };
|
|
708
|
+
}
|
|
709
|
+
let score = 0;
|
|
710
|
+
let reason = null;
|
|
711
|
+
const compactPeers = entityPrefixes.filter((peerEntityPrefix) => peerEntityPrefix !== entityPrefix).map((peerEntityPrefix) => profiles.get(peerEntityPrefix)).filter((peerProfile) => Boolean(peerProfile)).filter((peerProfile) => peerProfile.compactSignature === profile.compactSignature);
|
|
712
|
+
if (profile.tokenList.length >= 2 && compactPeers.some((peerProfile) => peerProfile.tokenList.length < profile.tokenList.length)) {
|
|
713
|
+
score += 2;
|
|
714
|
+
reason = "less abbreviated lexical form";
|
|
715
|
+
}
|
|
716
|
+
if (entityPrefix.length <= 3 && compactPeers.length === 0) {
|
|
717
|
+
score -= 1;
|
|
718
|
+
}
|
|
719
|
+
return { score, reason };
|
|
720
|
+
}
|
|
721
|
+
function collectConnectedEntityComponent(startingEntityPrefix, adjacency, visited) {
|
|
722
|
+
const queue = [startingEntityPrefix];
|
|
723
|
+
const component = [];
|
|
724
|
+
visited.add(startingEntityPrefix);
|
|
725
|
+
while (queue.length > 0) {
|
|
726
|
+
const entityPrefix = queue.shift();
|
|
727
|
+
if (!entityPrefix) {
|
|
728
|
+
continue;
|
|
729
|
+
}
|
|
730
|
+
component.push(entityPrefix);
|
|
731
|
+
const peers = adjacency.get(entityPrefix);
|
|
732
|
+
if (!peers) {
|
|
733
|
+
continue;
|
|
734
|
+
}
|
|
735
|
+
for (const peer of peers) {
|
|
736
|
+
if (visited.has(peer)) {
|
|
737
|
+
continue;
|
|
738
|
+
}
|
|
739
|
+
visited.add(peer);
|
|
740
|
+
queue.push(peer);
|
|
741
|
+
}
|
|
742
|
+
}
|
|
743
|
+
return component;
|
|
744
|
+
}
|
|
745
|
+
function findPairSupport(pairSupport, leftEntityPrefix, rightEntityPrefix) {
|
|
746
|
+
for (const support of pairSupport) {
|
|
747
|
+
const [leftEntity, rightEntity] = support.entityPrefixes;
|
|
748
|
+
if (leftEntity === leftEntityPrefix && rightEntity === rightEntityPrefix || leftEntity === rightEntityPrefix && rightEntity === leftEntityPrefix) {
|
|
749
|
+
return support;
|
|
750
|
+
}
|
|
751
|
+
}
|
|
752
|
+
return null;
|
|
753
|
+
}
|
|
754
|
+
function evaluateSingletonAliasCandidate(aliasProfile, dominantProfile) {
|
|
755
|
+
const lexicalRelation = evaluateSingletonAliasLexicalRelation(aliasProfile.entityPrefix, dominantProfile.entityPrefix);
|
|
756
|
+
if (!lexicalRelation.kind || !lexicalRelation.detail || lexicalRelation.scopeLike) {
|
|
757
|
+
return null;
|
|
758
|
+
}
|
|
759
|
+
const dominantTrustedCount = dominantProfile.trustedEntryCount;
|
|
760
|
+
if (dominantTrustedCount < SINGLETON_ALIAS_MIN_DOMINANT_TRUSTED_COUNT) {
|
|
761
|
+
return null;
|
|
762
|
+
}
|
|
763
|
+
const aliasLowTrustCount = buildLowTrustEntryCount(aliasProfile);
|
|
764
|
+
if (aliasLowTrustCount === 0) {
|
|
765
|
+
return null;
|
|
766
|
+
}
|
|
767
|
+
const evidence = [
|
|
768
|
+
{
|
|
769
|
+
kind: "singleton_family_size",
|
|
770
|
+
detail: `"${aliasProfile.entityPrefix}" has ${aliasProfile.activeEntryCount} active keyed ${pluralize(aliasProfile.activeEntryCount, "entry")}.`
|
|
771
|
+
},
|
|
772
|
+
{
|
|
773
|
+
kind: "dominant_trusted_family",
|
|
774
|
+
detail: `"${dominantProfile.entityPrefix}" already has ${dominantTrustedCount} trusted ${pluralize(dominantTrustedCount, "entry")}.`
|
|
775
|
+
},
|
|
776
|
+
{
|
|
777
|
+
kind: "low_trust_creation_path",
|
|
778
|
+
detail: describeLowTrustAliasFamily(aliasProfile)
|
|
779
|
+
},
|
|
780
|
+
{
|
|
781
|
+
kind: lexicalRelation.kind,
|
|
782
|
+
detail: lexicalRelation.detail
|
|
783
|
+
}
|
|
784
|
+
];
|
|
785
|
+
const confidence = Math.min(
|
|
786
|
+
0.98,
|
|
787
|
+
0.58 + Math.min(dominantTrustedCount, 6) * 0.05 + Math.min(aliasLowTrustCount, 2) * 0.05 + Math.min(dominantProfile.activeEntryCount - aliasProfile.activeEntryCount, 6) * 0.02 + lexicalRelation.strengthScore * 0.08
|
|
788
|
+
);
|
|
789
|
+
return {
|
|
790
|
+
aliasEntityPrefix: aliasProfile.entityPrefix,
|
|
791
|
+
dominantEntityPrefix: dominantProfile.entityPrefix,
|
|
792
|
+
aliasFamilySize: aliasProfile.activeEntryCount,
|
|
793
|
+
dominantFamilySize: dominantProfile.activeEntryCount,
|
|
794
|
+
dominantTrustedCount,
|
|
795
|
+
aliasLowTrustCount,
|
|
796
|
+
confidence,
|
|
797
|
+
canonicalReuseSafe: lexicalRelation.canonicalReuseSafe && aliasProfile.activeEntryCount === 1 && aliasLowTrustCount === aliasProfile.activeEntryCount && dominantTrustedCount >= SINGLETON_ALIAS_MIN_DOMINANT_TRUSTED_COUNT,
|
|
798
|
+
evidence
|
|
799
|
+
};
|
|
800
|
+
}
|
|
801
|
+
function selectBestSingletonAliasCandidate(candidates) {
|
|
802
|
+
const ranked = [...candidates].sort(
|
|
803
|
+
(left, right) => right.confidence - left.confidence || left.dominantEntityPrefix.localeCompare(right.dominantEntityPrefix)
|
|
804
|
+
);
|
|
805
|
+
const [best, runnerUp] = ranked;
|
|
806
|
+
if (!best) {
|
|
807
|
+
return [];
|
|
808
|
+
}
|
|
809
|
+
if (runnerUp && best.confidence - runnerUp.confidence < SINGLETON_ALIAS_MIN_CONFIDENCE_DELTA) {
|
|
810
|
+
return [];
|
|
811
|
+
}
|
|
812
|
+
return [best];
|
|
813
|
+
}
|
|
814
|
+
function evaluateSingletonAliasLexicalRelation(aliasEntityPrefix, dominantEntityPrefix) {
|
|
815
|
+
const aliasTokens = aliasEntityPrefix.split("_").filter((token) => token.length > 0);
|
|
816
|
+
const dominantTokens = dominantEntityPrefix.split("_").filter((token) => token.length > 0);
|
|
817
|
+
const aliasCompactSignature = aliasTokens.join("");
|
|
818
|
+
const dominantCompactSignature = dominantTokens.join("");
|
|
819
|
+
if (aliasCompactSignature === dominantCompactSignature && aliasEntityPrefix !== dominantEntityPrefix) {
|
|
820
|
+
return {
|
|
821
|
+
kind: "lexical_separator_variant",
|
|
822
|
+
detail: `Entity prefixes "${aliasEntityPrefix}" and "${dominantEntityPrefix}" collapse to the same compact lexical form.`,
|
|
823
|
+
canonicalReuseSafe: true,
|
|
824
|
+
scopeLike: false,
|
|
825
|
+
strengthScore: 3
|
|
826
|
+
};
|
|
827
|
+
}
|
|
828
|
+
if (!isTokenSubset(dominantTokens, aliasTokens)) {
|
|
829
|
+
return {
|
|
830
|
+
kind: null,
|
|
831
|
+
detail: null,
|
|
832
|
+
canonicalReuseSafe: false,
|
|
833
|
+
scopeLike: false,
|
|
834
|
+
strengthScore: 0
|
|
835
|
+
};
|
|
836
|
+
}
|
|
837
|
+
const dominantTokenSet = new Set(dominantTokens);
|
|
838
|
+
const addedTokens = aliasTokens.filter((token) => !dominantTokenSet.has(token));
|
|
839
|
+
const scopeLike = addedTokens.length !== 1 || addedTokens.some((token) => SINGLETON_ALIAS_SCOPE_TOKENS.has(token));
|
|
840
|
+
if (scopeLike) {
|
|
841
|
+
return {
|
|
842
|
+
kind: null,
|
|
843
|
+
detail: null,
|
|
844
|
+
canonicalReuseSafe: false,
|
|
845
|
+
scopeLike: true,
|
|
846
|
+
strengthScore: 0
|
|
847
|
+
};
|
|
848
|
+
}
|
|
849
|
+
return {
|
|
850
|
+
kind: "lexical_token_subset",
|
|
851
|
+
detail: `"${aliasEntityPrefix}" extends "${dominantEntityPrefix}" by the added token "${addedTokens[0]}".`,
|
|
852
|
+
canonicalReuseSafe: true,
|
|
853
|
+
scopeLike: false,
|
|
854
|
+
strengthScore: 2
|
|
855
|
+
};
|
|
856
|
+
}
|
|
857
|
+
function buildLowTrustEntryCount(profile) {
|
|
858
|
+
const deterministicOnlyCount = Math.max(0, profile.deterministicRepairEntryCount - profile.tentativeEntryCount);
|
|
859
|
+
return profile.tentativeEntryCount + profile.unresolvedEntryCount + deterministicOnlyCount;
|
|
860
|
+
}
|
|
861
|
+
function describeLowTrustAliasFamily(profile) {
|
|
862
|
+
const reasons = [];
|
|
863
|
+
if (profile.deterministicRepairEntryCount > 0) {
|
|
864
|
+
reasons.push(`${profile.deterministicRepairEntryCount} deterministic repair ${pluralize(profile.deterministicRepairEntryCount, "entry")}`);
|
|
865
|
+
}
|
|
866
|
+
if (profile.tentativeEntryCount > 0) {
|
|
867
|
+
reasons.push(`${profile.tentativeEntryCount} tentative ${pluralize(profile.tentativeEntryCount, "entry")}`);
|
|
868
|
+
}
|
|
869
|
+
if (profile.unresolvedEntryCount > 0) {
|
|
870
|
+
reasons.push(`${profile.unresolvedEntryCount} unresolved ${pluralize(profile.unresolvedEntryCount, "entry")}`);
|
|
871
|
+
}
|
|
872
|
+
if (reasons.length === 0) {
|
|
873
|
+
return `"${profile.entityPrefix}" is not fully trusted yet.`;
|
|
874
|
+
}
|
|
875
|
+
return `"${profile.entityPrefix}" is low-trust because it has ${reasons.join(", ")}.`;
|
|
876
|
+
}
|
|
877
|
+
function buildInitialism(tokens) {
|
|
878
|
+
if (tokens.length < 2) {
|
|
879
|
+
return "";
|
|
880
|
+
}
|
|
881
|
+
return tokens.map((token) => token[0] ?? "").join("");
|
|
882
|
+
}
|
|
883
|
+
function isTokenSubset(subsetTokens, supersetTokens) {
|
|
884
|
+
if (subsetTokens.length === 0 || subsetTokens.length >= supersetTokens.length) {
|
|
885
|
+
return false;
|
|
886
|
+
}
|
|
887
|
+
const superset = new Set(supersetTokens);
|
|
888
|
+
return subsetTokens.every((token) => superset.has(token));
|
|
889
|
+
}
|
|
890
|
+
function tokenizeGrounding(value) {
|
|
891
|
+
if (!value) {
|
|
892
|
+
return [];
|
|
893
|
+
}
|
|
894
|
+
const normalized = normalizeClaimKeySegment(value);
|
|
895
|
+
if (!normalized) {
|
|
896
|
+
return [];
|
|
897
|
+
}
|
|
898
|
+
return normalized.split("_").filter((token) => token.length >= 2 && !ENTITY_FAMILY_GROUNDING_STOP_TOKENS.has(token));
|
|
899
|
+
}
|
|
900
|
+
function intersectSets(left, right) {
|
|
901
|
+
const intersection = [];
|
|
902
|
+
const [small, large] = left.size <= right.size ? [left, right] : [right, left];
|
|
903
|
+
for (const value of small) {
|
|
904
|
+
if (large.has(value)) {
|
|
905
|
+
intersection.push(value);
|
|
906
|
+
}
|
|
907
|
+
}
|
|
908
|
+
return intersection.sort((first, second) => first.localeCompare(second));
|
|
909
|
+
}
|
|
910
|
+
function normalizeStringArray(values) {
|
|
911
|
+
return Array.from(new Set(values.map((value) => value.trim()).filter((value) => value.length > 0)));
|
|
912
|
+
}
|
|
913
|
+
function buildPairKey(leftEntityPrefix, rightEntityPrefix) {
|
|
914
|
+
return [leftEntityPrefix, rightEntityPrefix].sort((left, right) => left.localeCompare(right)).join("::");
|
|
915
|
+
}
|
|
916
|
+
function getOrCreateSet(map, key) {
|
|
917
|
+
const existing = map.get(key);
|
|
918
|
+
if (existing) {
|
|
919
|
+
return existing;
|
|
920
|
+
}
|
|
921
|
+
const created = /* @__PURE__ */ new Set();
|
|
922
|
+
map.set(key, created);
|
|
923
|
+
return created;
|
|
924
|
+
}
|
|
925
|
+
function pluralize(count, noun) {
|
|
926
|
+
return count === 1 ? noun : `${noun}s`;
|
|
927
|
+
}
|
|
928
|
+
|
|
929
|
+
// src/core/supersession.ts
|
|
930
|
+
function validateSupersessionRules(oldEntry, newEntry) {
|
|
931
|
+
if (oldEntry.type !== newEntry.type) {
|
|
932
|
+
return {
|
|
933
|
+
ok: false,
|
|
934
|
+
reason: "type_mismatch"
|
|
935
|
+
};
|
|
936
|
+
}
|
|
937
|
+
if (oldEntry.type === "milestone") {
|
|
938
|
+
return {
|
|
939
|
+
ok: false,
|
|
940
|
+
reason: "milestone"
|
|
941
|
+
};
|
|
942
|
+
}
|
|
943
|
+
if (oldEntry.expiry === "core") {
|
|
944
|
+
return {
|
|
945
|
+
ok: false,
|
|
946
|
+
reason: "core_expiry"
|
|
947
|
+
};
|
|
948
|
+
}
|
|
949
|
+
return {
|
|
950
|
+
ok: true
|
|
951
|
+
};
|
|
952
|
+
}
|
|
953
|
+
function describeSupersessionRuleFailure(reason) {
|
|
954
|
+
switch (reason) {
|
|
955
|
+
case "type_mismatch":
|
|
956
|
+
return "Supersession requires both entries to have the same type.";
|
|
957
|
+
case "milestone":
|
|
958
|
+
return "Milestone entries are never superseded automatically.";
|
|
959
|
+
case "core_expiry":
|
|
960
|
+
return "Core-expiry entries are never superseded automatically.";
|
|
961
|
+
}
|
|
962
|
+
}
|
|
963
|
+
|
|
964
|
+
// src/core/store/hashing.ts
|
|
965
|
+
import { createHash } from "crypto";
|
|
966
|
+
function computeContentHash(content, sourceFile) {
|
|
967
|
+
const input = sourceFile ? `${sourceFile}
|
|
968
|
+
${content}` : content;
|
|
969
|
+
return createHash("sha256").update(input).digest("hex");
|
|
970
|
+
}
|
|
971
|
+
function computeNormContentHash(content) {
|
|
972
|
+
const normalized = content.toLowerCase().replace(/\s+/g, " ").trim().replace(/[^\w\s]/g, "");
|
|
973
|
+
return createHash("sha256").update(normalized).digest("hex");
|
|
974
|
+
}
|
|
975
|
+
|
|
976
|
+
// src/core/store/project-scope.ts
|
|
977
|
+
import path from "path";
|
|
978
|
+
var IGNORED_PROJECT_DIRECTORY_NAMES = /* @__PURE__ */ new Set(["", ".", "..", "users", "user", "home", "tmp", "var"]);
|
|
979
|
+
function resolveDurableProjectScope(entry, context = {}) {
|
|
980
|
+
const entryProject = normalizeOptionalString(entry.project);
|
|
981
|
+
if (entryProject) {
|
|
982
|
+
return entryProject;
|
|
983
|
+
}
|
|
984
|
+
const sessionWorkspace = normalizeOptionalString(context.sessionWorkspace ?? void 0);
|
|
985
|
+
if (sessionWorkspace) {
|
|
986
|
+
if (claimKeySuggestsProjectScope(entry.claim_key, sessionWorkspace) || entryContainsProjectSignal(entry, sessionWorkspace)) {
|
|
987
|
+
return sessionWorkspace;
|
|
988
|
+
}
|
|
989
|
+
}
|
|
990
|
+
const workingDirectoryProject = deriveWorkingDirectoryProject(context.workingDirectory);
|
|
991
|
+
if (workingDirectoryProject && entryContainsProjectSignal(entry, workingDirectoryProject)) {
|
|
992
|
+
return workingDirectoryProject;
|
|
993
|
+
}
|
|
994
|
+
return void 0;
|
|
995
|
+
}
|
|
996
|
+
function claimKeySuggestsProjectScope(claimKey, project) {
|
|
997
|
+
const entity = normalizeMetadataIdentifier(claimKey?.split("/")[0]);
|
|
998
|
+
const normalizedProject = normalizeMetadataIdentifier(project);
|
|
999
|
+
if (!entity || !normalizedProject) {
|
|
1000
|
+
return false;
|
|
1001
|
+
}
|
|
1002
|
+
return entity === normalizedProject;
|
|
1003
|
+
}
|
|
1004
|
+
function deriveWorkingDirectoryProject(workingDirectory) {
|
|
1005
|
+
const normalizedWorkingDirectory = normalizeOptionalString(workingDirectory ?? void 0);
|
|
1006
|
+
if (!normalizedWorkingDirectory) {
|
|
1007
|
+
return void 0;
|
|
1008
|
+
}
|
|
1009
|
+
const candidate = normalizeMetadataIdentifier(path.basename(normalizedWorkingDirectory));
|
|
1010
|
+
if (!candidate || IGNORED_PROJECT_DIRECTORY_NAMES.has(candidate)) {
|
|
1011
|
+
return void 0;
|
|
1012
|
+
}
|
|
1013
|
+
return candidate;
|
|
1014
|
+
}
|
|
1015
|
+
function entryContainsProjectSignal(entry, project) {
|
|
1016
|
+
const projectTokens = project.split("_").filter((token) => token.length > 0);
|
|
1017
|
+
if (projectTokens.length === 0) {
|
|
1018
|
+
return false;
|
|
1019
|
+
}
|
|
1020
|
+
return [entry.subject, entry.source_context, ...entry.tags ?? []].some((value) => {
|
|
1021
|
+
const tokens = tokenizeText(value);
|
|
1022
|
+
return projectTokens.every((token) => tokens.has(token));
|
|
1023
|
+
});
|
|
1024
|
+
}
|
|
1025
|
+
function normalizeMetadataIdentifier(value) {
|
|
1026
|
+
const normalized = normalizeOptionalString(value)?.toLowerCase().replace(/[^a-z0-9]+/gu, "_").replace(/^_+|_+$/gu, "");
|
|
1027
|
+
return normalized && normalized.length > 0 ? normalized : void 0;
|
|
1028
|
+
}
|
|
1029
|
+
function tokenizeText(value) {
|
|
1030
|
+
return new Set(
|
|
1031
|
+
(value ?? "").toLowerCase().split(/[^a-z0-9]+/u).map((token) => token.trim()).filter((token) => token.length > 0)
|
|
1032
|
+
);
|
|
1033
|
+
}
|
|
1034
|
+
function normalizeOptionalString(value) {
|
|
1035
|
+
const normalized = value?.trim();
|
|
1036
|
+
return normalized && normalized.length > 0 ? normalized : void 0;
|
|
1037
|
+
}
|
|
1038
|
+
|
|
1039
|
+
// src/core/claim-key-support.ts
|
|
1040
|
+
var MAX_AUTO_APPLY_ATTRIBUTE_TOKENS = 4;
|
|
1041
|
+
var GROUNDING_STOP_TOKENS = /* @__PURE__ */ new Set([
|
|
1042
|
+
"a",
|
|
1043
|
+
"an",
|
|
1044
|
+
"and",
|
|
1045
|
+
"are",
|
|
1046
|
+
"as",
|
|
1047
|
+
"at",
|
|
1048
|
+
"be",
|
|
1049
|
+
"by",
|
|
1050
|
+
"for",
|
|
1051
|
+
"from",
|
|
1052
|
+
"how",
|
|
1053
|
+
"in",
|
|
1054
|
+
"into",
|
|
1055
|
+
"is",
|
|
1056
|
+
"it",
|
|
1057
|
+
"of",
|
|
1058
|
+
"on",
|
|
1059
|
+
"or",
|
|
1060
|
+
"our",
|
|
1061
|
+
"that",
|
|
1062
|
+
"the",
|
|
1063
|
+
"their",
|
|
1064
|
+
"this",
|
|
1065
|
+
"to",
|
|
1066
|
+
"we",
|
|
1067
|
+
"with"
|
|
1068
|
+
]);
|
|
1069
|
+
var AWKWARD_AUTO_APPLY_ATTRIBUTE_TOKENS = /* @__PURE__ */ new Set(["to", "for", "from", "with", "about", "into", "onto", "between", "during"]);
|
|
1070
|
+
var POLICY_TEMPLATE_ATTRIBUTE_TOKENS = /* @__PURE__ */ new Set(["policy", "default", "workflow", "process", "strategy", "guardrail", "rule", "boundary"]);
|
|
1071
|
+
var AUTHORITATIVE_TEMPLATE_ATTRIBUTE_TOKENS = /* @__PURE__ */ new Set(["source", "truth", "guide", "runbook", "reference"]);
|
|
1072
|
+
var ARCHITECTURE_TEMPLATE_ATTRIBUTE_TOKENS = /* @__PURE__ */ new Set([
|
|
1073
|
+
"adapter",
|
|
1074
|
+
"boundary",
|
|
1075
|
+
"architecture",
|
|
1076
|
+
"backend",
|
|
1077
|
+
"storage",
|
|
1078
|
+
"model",
|
|
1079
|
+
"support",
|
|
1080
|
+
"contract",
|
|
1081
|
+
"interface",
|
|
1082
|
+
"surface"
|
|
1083
|
+
]);
|
|
1084
|
+
var STABLE_FAMILY_SLOT_ATTRIBUTE_HEADS = /* @__PURE__ */ new Set([
|
|
1085
|
+
"access",
|
|
1086
|
+
"boundary",
|
|
1087
|
+
"condition",
|
|
1088
|
+
"contract",
|
|
1089
|
+
"dependency",
|
|
1090
|
+
"mode",
|
|
1091
|
+
"owner",
|
|
1092
|
+
"order",
|
|
1093
|
+
"path",
|
|
1094
|
+
"policy",
|
|
1095
|
+
"preference",
|
|
1096
|
+
"preservation",
|
|
1097
|
+
"process",
|
|
1098
|
+
"requirement",
|
|
1099
|
+
"role",
|
|
1100
|
+
"rule",
|
|
1101
|
+
"schedule",
|
|
1102
|
+
"sequencing",
|
|
1103
|
+
"setting",
|
|
1104
|
+
"status",
|
|
1105
|
+
"strategy",
|
|
1106
|
+
"support",
|
|
1107
|
+
"surface",
|
|
1108
|
+
"timezone",
|
|
1109
|
+
"version",
|
|
1110
|
+
"window",
|
|
1111
|
+
"workflow",
|
|
1112
|
+
"workspace"
|
|
1113
|
+
]);
|
|
1114
|
+
function buildTrustedClaimKeySupportSeed(entries) {
|
|
1115
|
+
const claimKeyStats = /* @__PURE__ */ new Map();
|
|
1116
|
+
const trustedEntries = [];
|
|
1117
|
+
for (const entry of entries) {
|
|
1118
|
+
const claimKey = entry.claim_key?.trim();
|
|
1119
|
+
if (!claimKey || !isTrustedClaimKeyForCleanup(claimKey)) {
|
|
1120
|
+
continue;
|
|
1121
|
+
}
|
|
1122
|
+
const inspection = inspectClaimKey(claimKey);
|
|
1123
|
+
if (!inspection.normalized) {
|
|
1124
|
+
continue;
|
|
1125
|
+
}
|
|
1126
|
+
const existing = claimKeyStats.get(claimKey);
|
|
1127
|
+
if (existing) {
|
|
1128
|
+
existing.count += 1;
|
|
1129
|
+
existing.maxImportance = Math.max(existing.maxImportance, entry.importance);
|
|
1130
|
+
existing.latestCreatedAt = existing.latestCreatedAt.localeCompare(entry.created_at) >= 0 ? existing.latestCreatedAt : entry.created_at;
|
|
1131
|
+
continue;
|
|
1132
|
+
}
|
|
1133
|
+
claimKeyStats.set(claimKey, {
|
|
1134
|
+
count: 1,
|
|
1135
|
+
maxImportance: entry.importance,
|
|
1136
|
+
latestCreatedAt: entry.created_at
|
|
1137
|
+
});
|
|
1138
|
+
trustedEntries.push({
|
|
1139
|
+
id: entry.id,
|
|
1140
|
+
claimKey: inspection.normalized.claimKey,
|
|
1141
|
+
entity: inspection.normalized.entity,
|
|
1142
|
+
attribute: inspection.normalized.attribute,
|
|
1143
|
+
type: entry.type,
|
|
1144
|
+
tags: normalizeGroundingTags(entry.tags),
|
|
1145
|
+
sourceContextTokens: tokenizeGroundingText(entry.source_context),
|
|
1146
|
+
subjectTokens: tokenizeGroundingText(entry.subject),
|
|
1147
|
+
createdAt: entry.created_at
|
|
1148
|
+
});
|
|
1149
|
+
}
|
|
1150
|
+
const orderedClaimKeys = [...claimKeyStats.entries()].sort((left, right) => {
|
|
1151
|
+
const countDelta = right[1].count - left[1].count;
|
|
1152
|
+
if (countDelta !== 0) {
|
|
1153
|
+
return countDelta;
|
|
1154
|
+
}
|
|
1155
|
+
const importanceDelta = right[1].maxImportance - left[1].maxImportance;
|
|
1156
|
+
if (importanceDelta !== 0) {
|
|
1157
|
+
return importanceDelta;
|
|
1158
|
+
}
|
|
1159
|
+
const createdAtDelta = right[1].latestCreatedAt.localeCompare(left[1].latestCreatedAt);
|
|
1160
|
+
if (createdAtDelta !== 0) {
|
|
1161
|
+
return createdAtDelta;
|
|
1162
|
+
}
|
|
1163
|
+
return left[0].localeCompare(right[0]);
|
|
1164
|
+
}).map(([claimKey]) => claimKey);
|
|
1165
|
+
const orderedEntries = orderedClaimKeys.flatMap(
|
|
1166
|
+
(claimKey) => trustedEntries.filter((entry) => entry.claimKey === claimKey).sort((left, right) => {
|
|
1167
|
+
const createdAtDelta = right.createdAt.localeCompare(left.createdAt);
|
|
1168
|
+
if (createdAtDelta !== 0) {
|
|
1169
|
+
return createdAtDelta;
|
|
1170
|
+
}
|
|
1171
|
+
return left.id.localeCompare(right.id);
|
|
1172
|
+
})
|
|
1173
|
+
);
|
|
1174
|
+
return {
|
|
1175
|
+
entries: orderedEntries
|
|
1176
|
+
};
|
|
1177
|
+
}
|
|
1178
|
+
function buildClaimKeySupportSeedFromExamples(claimKeys) {
|
|
1179
|
+
const entries = claimKeys.flatMap((claimKey, index) => {
|
|
1180
|
+
const inspection = inspectClaimKey(claimKey);
|
|
1181
|
+
if (!inspection.normalized || !isTrustedClaimKeyForCleanup(inspection.normalized.claimKey)) {
|
|
1182
|
+
return [];
|
|
1183
|
+
}
|
|
1184
|
+
return [
|
|
1185
|
+
{
|
|
1186
|
+
id: `example:${index + 1}`,
|
|
1187
|
+
claimKey: inspection.normalized.claimKey,
|
|
1188
|
+
entity: inspection.normalized.entity,
|
|
1189
|
+
attribute: inspection.normalized.attribute,
|
|
1190
|
+
tags: [],
|
|
1191
|
+
sourceContextTokens: [],
|
|
1192
|
+
subjectTokens: [],
|
|
1193
|
+
createdAt: "1970-01-01T00:00:00.000Z"
|
|
1194
|
+
}
|
|
1195
|
+
];
|
|
1196
|
+
});
|
|
1197
|
+
return { entries };
|
|
1198
|
+
}
|
|
1199
|
+
function evaluateClaimKeySupport(entry, targetClaimKey, trustedHints) {
|
|
1200
|
+
const inspection = inspectClaimKey(targetClaimKey);
|
|
1201
|
+
const normalized = inspection.normalized;
|
|
1202
|
+
if (!normalized) {
|
|
1203
|
+
return createEmptyClaimKeySupportEvaluation();
|
|
1204
|
+
}
|
|
1205
|
+
const entryTagSet = new Set(normalizeGroundingTags(entry.tags));
|
|
1206
|
+
const entrySourceTokens = new Set(tokenizeGroundingText(entry.source_context));
|
|
1207
|
+
const relevantEntries = trustedHints.entries.filter((trustedEntry) => {
|
|
1208
|
+
if (entry.id && trustedEntry.id === entry.id) {
|
|
1209
|
+
return false;
|
|
1210
|
+
}
|
|
1211
|
+
return trustedEntry.claimKey === normalized.claimKey || trustedEntry.entity === normalized.entity;
|
|
1212
|
+
});
|
|
1213
|
+
const exactReuseEntries = relevantEntries.filter((trustedEntry) => trustedEntry.claimKey === normalized.claimKey);
|
|
1214
|
+
const familyReuseEntries = relevantEntries.filter(
|
|
1215
|
+
(trustedEntry) => trustedEntry.claimKey !== normalized.claimKey && trustedEntry.entity === normalized.entity
|
|
1216
|
+
);
|
|
1217
|
+
const groundedExactReuseEntries = exactReuseEntries.filter((trustedEntry) => {
|
|
1218
|
+
const grounding = inspectGroundingOverlap(entryTagSet, entrySourceTokens, trustedEntry);
|
|
1219
|
+
return grounding.tagGrounding || grounding.sourceContextGrounding;
|
|
1220
|
+
});
|
|
1221
|
+
const groundedFamilyReuseEntries = familyReuseEntries.filter((trustedEntry) => {
|
|
1222
|
+
const grounding = inspectGroundingOverlap(entryTagSet, entrySourceTokens, trustedEntry);
|
|
1223
|
+
return grounding.tagGrounding || grounding.sourceContextGrounding;
|
|
1224
|
+
});
|
|
1225
|
+
const tagGrounding = relevantEntries.some((trustedEntry) => inspectGroundingOverlap(entryTagSet, entrySourceTokens, trustedEntry).tagGrounding);
|
|
1226
|
+
const sourceContextGrounding = relevantEntries.some(
|
|
1227
|
+
(trustedEntry) => inspectGroundingOverlap(entryTagSet, entrySourceTokens, trustedEntry).sourceContextGrounding
|
|
1228
|
+
);
|
|
1229
|
+
const localGrounding = tagGrounding || sourceContextGrounding;
|
|
1230
|
+
const lexicalAlignment = inspectCandidateLexicalAlignment(entry, normalized.entity, normalized.attribute);
|
|
1231
|
+
const templateSupport = matchesConservativeTemplateSupport(entry, normalized.attribute);
|
|
1232
|
+
const stableSlotSupport = matchesStableFamilySlotSupport(normalized.attribute);
|
|
1233
|
+
const trustedExactReuse = exactReuseEntries.length > 0 && (groundedExactReuseEntries.length > 0 || exactReuseEntries.every((candidate) => candidate.id.startsWith("example:")));
|
|
1234
|
+
const trustedEntityFamilyReuse = groundedFamilyReuseEntries.length > 0 || familyReuseEntries.some((candidate) => candidate.id.startsWith("example:"));
|
|
1235
|
+
const promotionSupport = resolveClaimKeyPromotionSupport({
|
|
1236
|
+
exactReuseCount: trustedExactReuse ? Math.max(1, groundedExactReuseEntries.length) : 0,
|
|
1237
|
+
familyReuseCount: familyReuseEntries.length,
|
|
1238
|
+
groundedFamilyReuseCount: groundedFamilyReuseEntries.length > 0 ? groundedFamilyReuseEntries.length : familyReuseEntries.some((candidate) => candidate.id.startsWith("example:")) ? familyReuseEntries.length : 0,
|
|
1239
|
+
localGrounding: localGrounding || familyReuseEntries.some((candidate) => candidate.id.startsWith("example:")),
|
|
1240
|
+
templateSupport,
|
|
1241
|
+
stableSlotSupport,
|
|
1242
|
+
lexicalAlignment
|
|
1243
|
+
});
|
|
1244
|
+
const supportedProposal = lexicalAlignment.any && (templateSupport || stableSlotSupport || trustedExactReuse || trustedEntityFamilyReuse || localGrounding);
|
|
1245
|
+
const supportEvidence = [
|
|
1246
|
+
trustedExactReuse ? "trusted_exact_reuse" : null,
|
|
1247
|
+
trustedEntityFamilyReuse ? "trusted_entity_family_reuse" : null,
|
|
1248
|
+
tagGrounding ? "tag_grounding" : null,
|
|
1249
|
+
sourceContextGrounding ? "source_context_grounding" : null,
|
|
1250
|
+
lexicalAlignment.entity ? "entity_lexical_alignment" : null,
|
|
1251
|
+
lexicalAlignment.attribute ? "attribute_lexical_alignment" : null,
|
|
1252
|
+
lexicalAlignment.strongEntityAttribute ? "strong_entity_attribute_lexical_alignment" : null,
|
|
1253
|
+
templateSupport ? "template_support" : null,
|
|
1254
|
+
stableSlotSupport ? "stable_slot_support" : null,
|
|
1255
|
+
promotionSupport.relaxedStableSlotFamilyGate ? "single_grounded_family_sibling" : null
|
|
1256
|
+
].filter((value) => value !== null);
|
|
1257
|
+
const rationaleFragments = [
|
|
1258
|
+
trustedExactReuse ? `trusted exact reuse from ${Math.max(1, groundedExactReuseEntries.length)} matching entr${Math.max(1, groundedExactReuseEntries.length) === 1 ? "y" : "ies"}` : null,
|
|
1259
|
+
trustedEntityFamilyReuse ? `trusted ${normalized.entity} family reuse from ${Math.max(1, groundedFamilyReuseEntries.length || familyReuseEntries.length)} supporting entr${Math.max(1, groundedFamilyReuseEntries.length || familyReuseEntries.length) === 1 ? "y" : "ies"}` : null,
|
|
1260
|
+
tagGrounding ? "overlapping tags with trusted corpus entries" : null,
|
|
1261
|
+
sourceContextGrounding ? "overlapping source_context with trusted corpus entries" : null,
|
|
1262
|
+
lexicalAlignment.strongEntityAttribute ? "strong entity and slot lexical alignment" : null,
|
|
1263
|
+
lexicalAlignment.attribute ? lexicalAlignment.strongEntityAttribute ? null : "clear lexical alignment to the proposed slot" : lexicalAlignment.entity ? "clear lexical alignment to the proposed entity" : null,
|
|
1264
|
+
templateSupport ? "a conservative policy/default/source-of-truth template match" : null,
|
|
1265
|
+
stableSlotSupport ? "a stable compact slot head in a well-established entity family" : null,
|
|
1266
|
+
promotionSupport.relaxedStableSlotFamilyGate ? "one grounded family sibling cleared the stable-slot family gate" : null
|
|
1267
|
+
].filter((value) => value !== null);
|
|
1268
|
+
return {
|
|
1269
|
+
autoApplyClass: promotionSupport.autoApplyClass,
|
|
1270
|
+
supportedProposal,
|
|
1271
|
+
trustedExactReuse,
|
|
1272
|
+
trustedEntityFamilyReuse,
|
|
1273
|
+
tagGrounding,
|
|
1274
|
+
sourceContextGrounding,
|
|
1275
|
+
localGrounding,
|
|
1276
|
+
entityLexicalAlignment: lexicalAlignment.entity,
|
|
1277
|
+
attributeLexicalAlignment: lexicalAlignment.attribute,
|
|
1278
|
+
strongEntityAttributeLexicalAlignment: lexicalAlignment.strongEntityAttribute,
|
|
1279
|
+
lexicalAlignment: lexicalAlignment.any,
|
|
1280
|
+
templateSupport,
|
|
1281
|
+
stableSlotSupport,
|
|
1282
|
+
familyReuseCount: familyReuseEntries.length,
|
|
1283
|
+
groundedFamilyReuseCount: groundedFamilyReuseEntries.length,
|
|
1284
|
+
relaxedStableSlotFamilyGate: promotionSupport.relaxedStableSlotFamilyGate,
|
|
1285
|
+
supportingDurableIds: normalizeStringArray2([
|
|
1286
|
+
...groundedExactReuseEntries.map((candidate) => candidate.id),
|
|
1287
|
+
...groundedFamilyReuseEntries.map((candidate) => candidate.id),
|
|
1288
|
+
...familyReuseEntries.filter((candidate) => candidate.id.startsWith("example:")).map((candidate) => candidate.id)
|
|
1289
|
+
]),
|
|
1290
|
+
supportEvidence,
|
|
1291
|
+
rationaleFragments
|
|
1292
|
+
};
|
|
1293
|
+
}
|
|
1294
|
+
function createEmptyClaimKeySupportEvaluation() {
|
|
1295
|
+
return {
|
|
1296
|
+
autoApplyClass: null,
|
|
1297
|
+
supportedProposal: false,
|
|
1298
|
+
trustedExactReuse: false,
|
|
1299
|
+
trustedEntityFamilyReuse: false,
|
|
1300
|
+
tagGrounding: false,
|
|
1301
|
+
sourceContextGrounding: false,
|
|
1302
|
+
localGrounding: false,
|
|
1303
|
+
entityLexicalAlignment: false,
|
|
1304
|
+
attributeLexicalAlignment: false,
|
|
1305
|
+
strongEntityAttributeLexicalAlignment: false,
|
|
1306
|
+
lexicalAlignment: false,
|
|
1307
|
+
templateSupport: false,
|
|
1308
|
+
stableSlotSupport: false,
|
|
1309
|
+
familyReuseCount: 0,
|
|
1310
|
+
groundedFamilyReuseCount: 0,
|
|
1311
|
+
relaxedStableSlotFamilyGate: false,
|
|
1312
|
+
supportingDurableIds: [],
|
|
1313
|
+
supportEvidence: [],
|
|
1314
|
+
rationaleFragments: []
|
|
1315
|
+
};
|
|
1316
|
+
}
|
|
1317
|
+
function evaluateClaimKeyCompactness(claimKey, prior) {
|
|
1318
|
+
const compacted = compactClaimKey(claimKey);
|
|
1319
|
+
if (!compacted) {
|
|
1320
|
+
return {
|
|
1321
|
+
claimKey,
|
|
1322
|
+
compactedFrom: null,
|
|
1323
|
+
compactionReason: null,
|
|
1324
|
+
compactEnoughForAutoApply: false,
|
|
1325
|
+
blockerReason: "invalid_claim_key"
|
|
1326
|
+
};
|
|
1327
|
+
}
|
|
1328
|
+
const attributeTokens = compacted.attribute.split("_").filter((token) => token.length > 0);
|
|
1329
|
+
const compactEnoughForAutoApply = attributeTokens.length > 0 && attributeTokens.length <= MAX_AUTO_APPLY_ATTRIBUTE_TOKENS && !attributeTokens.some((token) => AWKWARD_AUTO_APPLY_ATTRIBUTE_TOKENS.has(token));
|
|
1330
|
+
const compactedFrom = compacted.compactedFrom ?? prior?.priorCompactedFrom ?? null;
|
|
1331
|
+
const compactionReason = compacted.reason && prior?.priorCompactionReason ? `${prior.priorCompactionReason} and ${compacted.reason}` : compacted.reason ?? prior?.priorCompactionReason ?? null;
|
|
1332
|
+
return {
|
|
1333
|
+
claimKey: compacted.claimKey,
|
|
1334
|
+
compactedFrom,
|
|
1335
|
+
compactionReason,
|
|
1336
|
+
compactEnoughForAutoApply,
|
|
1337
|
+
blockerReason: compactEnoughForAutoApply ? null : "non_compact_canonical_slot"
|
|
1338
|
+
};
|
|
1339
|
+
}
|
|
1340
|
+
function normalizeGroundingTags(tags) {
|
|
1341
|
+
return normalizeStringArray2((tags ?? []).map((tag) => normalizeClaimKeySegment(tag)).filter((tag) => tag.length > 0));
|
|
1342
|
+
}
|
|
1343
|
+
function tokenizeGroundingText(value) {
|
|
1344
|
+
if (!value) {
|
|
1345
|
+
return [];
|
|
1346
|
+
}
|
|
1347
|
+
return normalizeStringArray2(
|
|
1348
|
+
value.split(/[^a-zA-Z0-9]+/u).map((token) => normalizeClaimKeySegment(token)).filter((token) => token.length > 2 && !GROUNDING_STOP_TOKENS.has(token))
|
|
1349
|
+
);
|
|
1350
|
+
}
|
|
1351
|
+
function buildDurableLocalLexicalTokens(entry) {
|
|
1352
|
+
return normalizeStringArray2([
|
|
1353
|
+
...tokenizeGroundingText(entry.subject),
|
|
1354
|
+
...tokenizeGroundingText(entry.content),
|
|
1355
|
+
...tokenizeGroundingText(entry.source_context),
|
|
1356
|
+
...normalizeGroundingTags(entry.tags)
|
|
1357
|
+
]);
|
|
1358
|
+
}
|
|
1359
|
+
function resolveClaimKeyPromotionSupport(input) {
|
|
1360
|
+
if (input.exactReuseCount > 0 && (input.lexicalAlignment.attribute || input.templateSupport)) {
|
|
1361
|
+
return {
|
|
1362
|
+
autoApplyClass: "trusted_exact_reuse_grounded",
|
|
1363
|
+
relaxedStableSlotFamilyGate: false
|
|
1364
|
+
};
|
|
1365
|
+
}
|
|
1366
|
+
if (input.templateSupport && input.localGrounding && input.familyReuseCount > 0 && (input.lexicalAlignment.attribute || input.lexicalAlignment.entity)) {
|
|
1367
|
+
return {
|
|
1368
|
+
autoApplyClass: "trusted_family_template_grounded",
|
|
1369
|
+
relaxedStableSlotFamilyGate: false
|
|
1370
|
+
};
|
|
1371
|
+
}
|
|
1372
|
+
const relaxedStableSlotFamilyGate = input.stableSlotSupport && input.localGrounding && input.groundedFamilyReuseCount > 0 && input.familyReuseCount === 1 && input.lexicalAlignment.strongEntityAttribute;
|
|
1373
|
+
if (input.stableSlotSupport && input.localGrounding && input.groundedFamilyReuseCount > 0 && (input.familyReuseCount >= 2 || relaxedStableSlotFamilyGate) && input.lexicalAlignment.attribute) {
|
|
1374
|
+
return {
|
|
1375
|
+
autoApplyClass: "trusted_family_stable_slot",
|
|
1376
|
+
relaxedStableSlotFamilyGate
|
|
1377
|
+
};
|
|
1378
|
+
}
|
|
1379
|
+
if (input.localGrounding && input.groundedFamilyReuseCount > 0 && input.lexicalAlignment.strongEntityAttribute) {
|
|
1380
|
+
return {
|
|
1381
|
+
autoApplyClass: "trusted_family_grounded_alignment",
|
|
1382
|
+
relaxedStableSlotFamilyGate: false
|
|
1383
|
+
};
|
|
1384
|
+
}
|
|
1385
|
+
return {
|
|
1386
|
+
autoApplyClass: null,
|
|
1387
|
+
relaxedStableSlotFamilyGate: false
|
|
1388
|
+
};
|
|
1389
|
+
}
|
|
1390
|
+
function inspectGroundingOverlap(entryTagSet, entrySourceTokens, trustedEntry) {
|
|
1391
|
+
return {
|
|
1392
|
+
tagGrounding: countSetOverlap(entryTagSet, trustedEntry.tags) > 0,
|
|
1393
|
+
sourceContextGrounding: countSetOverlap(entrySourceTokens, trustedEntry.sourceContextTokens) > 0
|
|
1394
|
+
};
|
|
1395
|
+
}
|
|
1396
|
+
function inspectCandidateLexicalAlignment(entry, entity, attribute) {
|
|
1397
|
+
const lexicalTokens = new Set(buildDurableLocalLexicalTokens(entry));
|
|
1398
|
+
const entityTokens = entity.split("_").filter((token) => token.length > 0);
|
|
1399
|
+
const attributeTokens = attribute.split("_").filter((token) => token.length > 0 && !GROUNDING_STOP_TOKENS.has(token));
|
|
1400
|
+
const entityOverlapCount = countSetOverlap(lexicalTokens, entityTokens);
|
|
1401
|
+
const attributeOverlapCount = countSetOverlap(lexicalTokens, attributeTokens);
|
|
1402
|
+
const entityAlignment = entityOverlapCount > 0;
|
|
1403
|
+
const attributeAlignment = attributeOverlapCount > 0;
|
|
1404
|
+
const strongAttributeAlignment = attributeTokens.length > 0 && attributeOverlapCount >= Math.min(attributeTokens.length, 2);
|
|
1405
|
+
return {
|
|
1406
|
+
entity: entityAlignment,
|
|
1407
|
+
attribute: attributeAlignment,
|
|
1408
|
+
any: entityAlignment || attributeAlignment,
|
|
1409
|
+
strongEntityAttribute: entityAlignment && strongAttributeAlignment,
|
|
1410
|
+
entityOverlapCount,
|
|
1411
|
+
attributeOverlapCount
|
|
1412
|
+
};
|
|
1413
|
+
}
|
|
1414
|
+
function matchesConservativeTemplateSupport(entry, attribute) {
|
|
1415
|
+
const attributeTokens = new Set(attribute.split("_").filter((token) => token.length > 0));
|
|
1416
|
+
const subjectText = entry.subject.toLowerCase();
|
|
1417
|
+
const contentText = entry.content.toLowerCase();
|
|
1418
|
+
const combinedText = `${subjectText}
|
|
1419
|
+
${contentText}`;
|
|
1420
|
+
const authoritativePattern = /\b(authoritative|source of truth|source of record|canonical guide|canonical reference|primary guide|runbook)\b/u.test(
|
|
1421
|
+
combinedText
|
|
1422
|
+
);
|
|
1423
|
+
if (authoritativePattern && intersects(attributeTokens, AUTHORITATIVE_TEMPLATE_ATTRIBUTE_TOKENS)) {
|
|
1424
|
+
return true;
|
|
1425
|
+
}
|
|
1426
|
+
const policyPattern = /\b(should|must|should stay|must stay|always|never|default(?:s)? to|default(?:s)?|policy|guardrail|required|preference|prefers?)\b/u.test(combinedText);
|
|
1427
|
+
if (policyPattern && intersects(attributeTokens, POLICY_TEMPLATE_ATTRIBUTE_TOKENS)) {
|
|
1428
|
+
return true;
|
|
1429
|
+
}
|
|
1430
|
+
const architecturePattern = /\b(uses|supports|backed by|architecture|boundary|workflow|process|pipeline|adapter|layer|contract|interface|surface)\b/u.test(
|
|
1431
|
+
combinedText
|
|
1432
|
+
);
|
|
1433
|
+
return architecturePattern && intersects(attributeTokens, ARCHITECTURE_TEMPLATE_ATTRIBUTE_TOKENS);
|
|
1434
|
+
}
|
|
1435
|
+
function matchesStableFamilySlotSupport(attribute) {
|
|
1436
|
+
const tokens = attribute.split("_").filter((token) => token.length > 0);
|
|
1437
|
+
if (tokens.length === 0 || tokens.length > MAX_AUTO_APPLY_ATTRIBUTE_TOKENS) {
|
|
1438
|
+
return false;
|
|
1439
|
+
}
|
|
1440
|
+
const head = tokens[tokens.length - 1];
|
|
1441
|
+
return typeof head === "string" && STABLE_FAMILY_SLOT_ATTRIBUTE_HEADS.has(head);
|
|
1442
|
+
}
|
|
1443
|
+
function countSetOverlap(left, right) {
|
|
1444
|
+
let count = 0;
|
|
1445
|
+
for (const value of right) {
|
|
1446
|
+
if (left.has(value)) {
|
|
1447
|
+
count += 1;
|
|
1448
|
+
}
|
|
1449
|
+
}
|
|
1450
|
+
return count;
|
|
1451
|
+
}
|
|
1452
|
+
function intersects(left, right) {
|
|
1453
|
+
for (const value of left) {
|
|
1454
|
+
if (right.has(value)) {
|
|
1455
|
+
return true;
|
|
1456
|
+
}
|
|
1457
|
+
}
|
|
1458
|
+
return false;
|
|
1459
|
+
}
|
|
1460
|
+
function normalizeStringArray2(values) {
|
|
1461
|
+
const seen = /* @__PURE__ */ new Set();
|
|
1462
|
+
const normalized = [];
|
|
1463
|
+
for (const value of values) {
|
|
1464
|
+
if (!value || seen.has(value)) {
|
|
1465
|
+
continue;
|
|
1466
|
+
}
|
|
1467
|
+
seen.add(value);
|
|
1468
|
+
normalized.push(value);
|
|
1469
|
+
}
|
|
1470
|
+
return normalized;
|
|
1471
|
+
}
|
|
1472
|
+
|
|
1473
|
+
// src/core/store/claim-extraction.ts
|
|
1474
|
+
var SELF_REFERENTIAL_ENTITIES = /* @__PURE__ */ new Set(["i", "me", "the_user", "myself", "user", "we", "our_team", "the_project", "this_project"]);
|
|
1475
|
+
var USER_REFERENTIAL_ENTITIES = /* @__PURE__ */ new Set(["i", "me", "myself", "the_user", "user"]);
|
|
1476
|
+
var PROJECT_REFERENTIAL_ENTITIES = /* @__PURE__ */ new Set(["the_project", "this_project"]);
|
|
1477
|
+
var DETERMINISTIC_ATTRIBUTE_HEADS = /* @__PURE__ */ new Set([
|
|
1478
|
+
"budget",
|
|
1479
|
+
"city",
|
|
1480
|
+
"config",
|
|
1481
|
+
"deadline",
|
|
1482
|
+
"email",
|
|
1483
|
+
"employer",
|
|
1484
|
+
"language",
|
|
1485
|
+
"limit",
|
|
1486
|
+
"location",
|
|
1487
|
+
"mode",
|
|
1488
|
+
"model",
|
|
1489
|
+
"name",
|
|
1490
|
+
"owner",
|
|
1491
|
+
"plan",
|
|
1492
|
+
"policy",
|
|
1493
|
+
"preference",
|
|
1494
|
+
"priority",
|
|
1495
|
+
"quota",
|
|
1496
|
+
"region",
|
|
1497
|
+
"role",
|
|
1498
|
+
"schedule",
|
|
1499
|
+
"setting",
|
|
1500
|
+
"status",
|
|
1501
|
+
"strategy",
|
|
1502
|
+
"team",
|
|
1503
|
+
"theme",
|
|
1504
|
+
"timezone",
|
|
1505
|
+
"version",
|
|
1506
|
+
"window"
|
|
1507
|
+
]);
|
|
1508
|
+
var MAX_ENTITY_HINTS = 12;
|
|
1509
|
+
var MAX_CLAIM_KEY_EXAMPLES = 8;
|
|
1510
|
+
var MAX_SUPPORT_CLAIM_KEY_EXAMPLES = 128;
|
|
1511
|
+
var DEFAULT_REPAIR_CONFIDENCE = 0.86;
|
|
1512
|
+
var HIGH_CONFIDENCE_BACKFILL_THRESHOLD = 0.92;
|
|
1513
|
+
var SUPPORTED_INGEST_AUTO_APPLY_THRESHOLD = 0.72;
|
|
1514
|
+
var COMPACTED_SUPPORTED_INGEST_AUTO_APPLY_THRESHOLD = 0.74;
|
|
1515
|
+
var PROPOSAL_CONFIDENCE_THRESHOLD = 0.75;
|
|
1516
|
+
var SUPPORTED_PROPOSAL_CONFIDENCE_THRESHOLD = 0.65;
|
|
1517
|
+
function applyClaimExtractionResultToEntry(entry, extracted) {
|
|
1518
|
+
const lifecycle = buildExtractedClaimKeyLifecycle(extracted, buildInferredIngestClaimKeySupportContext(entry));
|
|
1519
|
+
if (!lifecycle) {
|
|
1520
|
+
return;
|
|
1521
|
+
}
|
|
1522
|
+
applyClaimKeyLifecycle(entry, lifecycle);
|
|
1523
|
+
}
|
|
1524
|
+
async function previewClaimKeyExtraction(entry, llm, config, options = {}) {
|
|
1525
|
+
if (!config.enabled || !config.eligibleTypes.includes(entry.type)) {
|
|
1526
|
+
return null;
|
|
1527
|
+
}
|
|
1528
|
+
const normalizedHints = normalizeClaimExtractionHints(options.hints ?? {});
|
|
1529
|
+
let attempt;
|
|
1530
|
+
try {
|
|
1531
|
+
attempt = await attemptClaimExtraction(entry, normalizedHints, llm);
|
|
1532
|
+
} catch (error) {
|
|
1533
|
+
const repaired = tryDeterministicClaimKeyRepair(entry, normalizedHints);
|
|
1534
|
+
if (repaired) {
|
|
1535
|
+
return repaired;
|
|
1536
|
+
}
|
|
1537
|
+
throw error;
|
|
1538
|
+
}
|
|
1539
|
+
if (attempt.response.no_claim === true) {
|
|
1540
|
+
options.onPreviewOutcome?.(buildPreviewOutcome("no_claim", attempt));
|
|
1541
|
+
return null;
|
|
1542
|
+
}
|
|
1543
|
+
const candidate = buildClaimExtractionCandidate(entry, attempt.response, normalizedHints, options.onWarning);
|
|
1544
|
+
if (candidate) {
|
|
1545
|
+
options.onPreviewOutcome?.({
|
|
1546
|
+
outcome: "candidate",
|
|
1547
|
+
confidence: candidate.confidence,
|
|
1548
|
+
rawEntity: candidate.rawEntity,
|
|
1549
|
+
rawAttribute: candidate.rawAttribute,
|
|
1550
|
+
path: attempt.path
|
|
1551
|
+
});
|
|
1552
|
+
return {
|
|
1553
|
+
claimKey: candidate.claimKey,
|
|
1554
|
+
confidence: candidate.confidence,
|
|
1555
|
+
rawEntity: candidate.rawEntity,
|
|
1556
|
+
rawAttribute: candidate.rawAttribute,
|
|
1557
|
+
path: attempt.path,
|
|
1558
|
+
...candidate.compactedFrom ? {
|
|
1559
|
+
compactedFrom: candidate.compactedFrom,
|
|
1560
|
+
compactionReason: candidate.compactionReason
|
|
1561
|
+
} : {}
|
|
1562
|
+
};
|
|
1563
|
+
}
|
|
1564
|
+
options.onPreviewOutcome?.(buildPreviewOutcome("rejected_candidate", attempt));
|
|
1565
|
+
return tryDeterministicClaimKeyRepair(entry, normalizedHints);
|
|
1566
|
+
}
|
|
1567
|
+
async function extractClaimKeyDecision(entry, llm, config, options = {}) {
|
|
1568
|
+
if (!config.enabled || !config.eligibleTypes.includes(entry.type)) {
|
|
1569
|
+
return {
|
|
1570
|
+
result: null,
|
|
1571
|
+
diagnostic: {
|
|
1572
|
+
outcome: "ineligible_type",
|
|
1573
|
+
confidence: null,
|
|
1574
|
+
path: null,
|
|
1575
|
+
warning: null,
|
|
1576
|
+
suggestedClaimKey: null,
|
|
1577
|
+
reviewable: false,
|
|
1578
|
+
supportEvidence: [],
|
|
1579
|
+
rationale: "entry type is not eligible for claim-key extraction"
|
|
1580
|
+
}
|
|
1581
|
+
};
|
|
1582
|
+
}
|
|
1583
|
+
const normalizedHints = normalizeClaimExtractionHints(options.hints ?? {});
|
|
1584
|
+
let attempt;
|
|
1585
|
+
try {
|
|
1586
|
+
attempt = await attemptClaimExtraction(entry, normalizedHints, llm);
|
|
1587
|
+
} catch (error) {
|
|
1588
|
+
const repaired2 = tryDeterministicClaimKeyRepair(entry, normalizedHints);
|
|
1589
|
+
if (repaired2) {
|
|
1590
|
+
return finalizeDeterministicRepairDecision(repaired2, options.entityPrefixStats);
|
|
1591
|
+
}
|
|
1592
|
+
const warning = formatClaimExtractionError(error);
|
|
1593
|
+
options.onWarning?.(`Claim extraction failed for "${entry.subject}": ${warning}`);
|
|
1594
|
+
return {
|
|
1595
|
+
result: null,
|
|
1596
|
+
diagnostic: {
|
|
1597
|
+
outcome: "extraction_failure",
|
|
1598
|
+
confidence: null,
|
|
1599
|
+
path: null,
|
|
1600
|
+
warning,
|
|
1601
|
+
suggestedClaimKey: null,
|
|
1602
|
+
reviewable: false,
|
|
1603
|
+
supportEvidence: [],
|
|
1604
|
+
rationale: "claim extraction failed before a safe candidate could be produced"
|
|
1605
|
+
}
|
|
1606
|
+
};
|
|
1607
|
+
}
|
|
1608
|
+
if (attempt.response.no_claim === true) {
|
|
1609
|
+
return {
|
|
1610
|
+
result: null,
|
|
1611
|
+
diagnostic: {
|
|
1612
|
+
outcome: "no_claim",
|
|
1613
|
+
confidence: normalizeConfidence(attempt.response.confidence),
|
|
1614
|
+
path: attempt.path,
|
|
1615
|
+
warning: null,
|
|
1616
|
+
suggestedClaimKey: null,
|
|
1617
|
+
reviewable: false,
|
|
1618
|
+
supportEvidence: [],
|
|
1619
|
+
rationale: "model explicitly returned no_claim"
|
|
1620
|
+
}
|
|
1621
|
+
};
|
|
1622
|
+
}
|
|
1623
|
+
const warnings = [];
|
|
1624
|
+
const candidate = buildClaimExtractionCandidate(entry, attempt.response, normalizedHints, (warning) => {
|
|
1625
|
+
warnings.push(warning);
|
|
1626
|
+
options.onWarning?.(warning);
|
|
1627
|
+
});
|
|
1628
|
+
if (!candidate) {
|
|
1629
|
+
const repaired2 = tryDeterministicClaimKeyRepair(entry, normalizedHints);
|
|
1630
|
+
if (repaired2) {
|
|
1631
|
+
return finalizeDeterministicRepairDecision(repaired2, options.entityPrefixStats);
|
|
1632
|
+
}
|
|
1633
|
+
return {
|
|
1634
|
+
result: null,
|
|
1635
|
+
diagnostic: {
|
|
1636
|
+
outcome: "rejected_candidate",
|
|
1637
|
+
confidence: normalizeConfidence(attempt.response.confidence),
|
|
1638
|
+
path: attempt.path,
|
|
1639
|
+
warning: warnings[0] ?? null,
|
|
1640
|
+
suggestedClaimKey: null,
|
|
1641
|
+
reviewable: false,
|
|
1642
|
+
supportEvidence: [],
|
|
1643
|
+
rationale: "model proposed a structurally unsafe or non-canonical claim key"
|
|
1644
|
+
}
|
|
1645
|
+
};
|
|
1646
|
+
}
|
|
1647
|
+
const result = toClaimExtractionResult(candidate, attempt.path);
|
|
1648
|
+
if (result.confidence >= config.confidenceThreshold) {
|
|
1649
|
+
return {
|
|
1650
|
+
result,
|
|
1651
|
+
diagnostic: buildAcceptedDiagnostic(result, result.confidence >= config.confidenceThreshold ? "candidate met the ingest confidence threshold" : null)
|
|
1652
|
+
};
|
|
1653
|
+
}
|
|
1654
|
+
const support = evaluateClaimKeySupport(
|
|
1655
|
+
{
|
|
1656
|
+
subject: entry.subject,
|
|
1657
|
+
content: entry.content,
|
|
1658
|
+
type: entry.type,
|
|
1659
|
+
tags: entry.tags,
|
|
1660
|
+
source_context: entry.source_context
|
|
1661
|
+
},
|
|
1662
|
+
result.claimKey ?? "",
|
|
1663
|
+
buildClaimKeySupportSeedFromExamples(options.supportClaimKeys ?? [])
|
|
1664
|
+
);
|
|
1665
|
+
const compactness = evaluateClaimKeyCompactness(result.claimKey ?? "", {
|
|
1666
|
+
priorCompactedFrom: result.compactedFrom ?? null,
|
|
1667
|
+
priorCompactionReason: result.compactionReason ?? null
|
|
1668
|
+
});
|
|
1669
|
+
const autoApplyThreshold = support.autoApplyClass !== null && compactness.compactedFrom ? COMPACTED_SUPPORTED_INGEST_AUTO_APPLY_THRESHOLD : support.autoApplyClass !== null ? SUPPORTED_INGEST_AUTO_APPLY_THRESHOLD : HIGH_CONFIDENCE_BACKFILL_THRESHOLD;
|
|
1670
|
+
const proposalThreshold = support.supportedProposal ? SUPPORTED_PROPOSAL_CONFIDENCE_THRESHOLD : PROPOSAL_CONFIDENCE_THRESHOLD;
|
|
1671
|
+
if (compactness.claimKey !== result.claimKey) {
|
|
1672
|
+
result.claimKey = compactness.claimKey;
|
|
1673
|
+
result.compactedFrom = compactness.compactedFrom;
|
|
1674
|
+
result.compactionReason = compactness.compactionReason;
|
|
1675
|
+
}
|
|
1676
|
+
if (result.confidence >= autoApplyThreshold && compactness.compactEnoughForAutoApply) {
|
|
1677
|
+
result.acceptanceRationale = support.autoApplyClass !== null ? `accepted below the default threshold via ${describeSupportPromotionClass(support)}` : "accepted as a high-confidence preview";
|
|
1678
|
+
return {
|
|
1679
|
+
result,
|
|
1680
|
+
diagnostic: buildAcceptedDiagnostic(
|
|
1681
|
+
result,
|
|
1682
|
+
support.autoApplyClass !== null ? `supported near-miss candidate cleared the conservative auto-apply threshold via ${describeSupportPromotionClass(support)}` : `candidate cleared the conservative high-confidence threshold of ${autoApplyThreshold.toFixed(2)}`
|
|
1683
|
+
)
|
|
1684
|
+
};
|
|
1685
|
+
}
|
|
1686
|
+
const repaired = tryDeterministicClaimKeyRepair(entry, normalizedHints);
|
|
1687
|
+
if (repaired && (!result.claimKey || repaired.claimKey === result.claimKey)) {
|
|
1688
|
+
return finalizeDeterministicRepairDecision(repaired, options.entityPrefixStats);
|
|
1689
|
+
}
|
|
1690
|
+
if (result.confidence >= proposalThreshold) {
|
|
1691
|
+
return {
|
|
1692
|
+
result: null,
|
|
1693
|
+
diagnostic: {
|
|
1694
|
+
outcome: "low_confidence_candidate",
|
|
1695
|
+
confidence: result.confidence,
|
|
1696
|
+
path: result.path,
|
|
1697
|
+
warning: warnings[0] ?? null,
|
|
1698
|
+
suggestedClaimKey: result.claimKey,
|
|
1699
|
+
reviewable: true,
|
|
1700
|
+
supportEvidence: support.supportEvidence,
|
|
1701
|
+
rationale: support.rationaleFragments.length > 0 ? `candidate stayed below the auto-apply threshold but has structured support from ${support.rationaleFragments.join(", ")}` : `candidate stayed below the auto-apply threshold of ${autoApplyThreshold.toFixed(2)}`
|
|
1702
|
+
}
|
|
1703
|
+
};
|
|
1704
|
+
}
|
|
1705
|
+
return {
|
|
1706
|
+
result: null,
|
|
1707
|
+
diagnostic: {
|
|
1708
|
+
outcome: "low_confidence_candidate",
|
|
1709
|
+
confidence: result.confidence,
|
|
1710
|
+
path: result.path,
|
|
1711
|
+
warning: warnings[0] ?? null,
|
|
1712
|
+
suggestedClaimKey: result.claimKey,
|
|
1713
|
+
reviewable: false,
|
|
1714
|
+
supportEvidence: support.supportEvidence,
|
|
1715
|
+
rationale: "candidate stayed below both the conservative auto-apply and review thresholds"
|
|
1716
|
+
}
|
|
1717
|
+
};
|
|
1718
|
+
}
|
|
1719
|
+
async function getEntityHints(db) {
|
|
1720
|
+
return db.getDistinctClaimKeyPrefixes();
|
|
1721
|
+
}
|
|
1722
|
+
async function runBatchClaimExtraction(results, ports, config, concurrency = 10, onWarning, onDiagnostic, onProgress) {
|
|
1723
|
+
if (!config.enabled) {
|
|
1724
|
+
return /* @__PURE__ */ new Map();
|
|
1725
|
+
}
|
|
1726
|
+
const hintState = await loadClaimExtractionHintState(ports.db);
|
|
1727
|
+
const llm = ports.createLlm();
|
|
1728
|
+
const extractedEntries = /* @__PURE__ */ new Map();
|
|
1729
|
+
const diagnostics = /* @__PURE__ */ new Map();
|
|
1730
|
+
const retryEntries = [];
|
|
1731
|
+
const stageSize = normalizeClaimExtractionConcurrency(concurrency);
|
|
1732
|
+
const orderedEntries = results.flatMap((result) => result.entries);
|
|
1733
|
+
const totalEligibleEntries = orderedEntries.filter((entry) => !entry.claim_key && config.eligibleTypes.includes(entry.type)).length;
|
|
1734
|
+
let completedPrimaryEntries = 0;
|
|
1735
|
+
for (let stageStart = 0; stageStart < orderedEntries.length; stageStart += stageSize) {
|
|
1736
|
+
const stageEntries = orderedEntries.slice(stageStart, stageStart + stageSize);
|
|
1737
|
+
const stageRequests = [];
|
|
1738
|
+
for (const entry of stageEntries) {
|
|
1739
|
+
if (entry.claim_key) {
|
|
1740
|
+
recordClaimKeyHint(hintState, entry.claim_key);
|
|
1741
|
+
continue;
|
|
1742
|
+
}
|
|
1743
|
+
if (!config.eligibleTypes.includes(entry.type)) {
|
|
1744
|
+
diagnostics.set(entry, {
|
|
1745
|
+
outcome: "ineligible_type",
|
|
1746
|
+
confidence: null,
|
|
1747
|
+
path: null,
|
|
1748
|
+
warning: null,
|
|
1749
|
+
suggestedClaimKey: null,
|
|
1750
|
+
reviewable: false,
|
|
1751
|
+
supportEvidence: [],
|
|
1752
|
+
rationale: "entry type is not eligible for claim-key extraction"
|
|
1753
|
+
});
|
|
1754
|
+
continue;
|
|
1755
|
+
}
|
|
1756
|
+
stageRequests.push({
|
|
1757
|
+
entry,
|
|
1758
|
+
hintSnapshot: buildClaimExtractionHintSnapshot(hintState, entry)
|
|
1759
|
+
});
|
|
1760
|
+
}
|
|
1761
|
+
const stageDecisions = await executeClaimExtractionStageRequests(
|
|
1762
|
+
stageRequests,
|
|
1763
|
+
llm,
|
|
1764
|
+
config,
|
|
1765
|
+
onWarning,
|
|
1766
|
+
completedPrimaryEntries,
|
|
1767
|
+
totalEligibleEntries,
|
|
1768
|
+
(completedEntries, totalEntries) => {
|
|
1769
|
+
completedPrimaryEntries = completedEntries;
|
|
1770
|
+
onProgress?.({
|
|
1771
|
+
phase: "primary",
|
|
1772
|
+
completedEntries,
|
|
1773
|
+
totalEntries,
|
|
1774
|
+
totalEligibleEntries
|
|
1775
|
+
});
|
|
1776
|
+
}
|
|
1777
|
+
);
|
|
1778
|
+
for (const { entry, decision } of stageDecisions) {
|
|
1779
|
+
diagnostics.set(entry, decision.diagnostic);
|
|
1780
|
+
if (decision.result?.claimKey) {
|
|
1781
|
+
applyClaimExtractionResultToEntry(entry, decision.result);
|
|
1782
|
+
recordClaimKeyHint(hintState, decision.result.claimKey);
|
|
1783
|
+
extractedEntries.set(entry, decision.result);
|
|
1784
|
+
continue;
|
|
1785
|
+
}
|
|
1786
|
+
retryEntries.push(entry);
|
|
1787
|
+
}
|
|
1788
|
+
}
|
|
1789
|
+
if (retryEntries.length > 0 && extractedEntries.size > 0) {
|
|
1790
|
+
const retryEligibleEntries = retryEntries.filter((entry) => !entry.claim_key);
|
|
1791
|
+
const totalRetryEntries = retryEligibleEntries.length;
|
|
1792
|
+
let completedRetryEntries = 0;
|
|
1793
|
+
for (let stageStart = 0; stageStart < retryEligibleEntries.length; stageStart += stageSize) {
|
|
1794
|
+
const stageRequests = retryEligibleEntries.slice(stageStart, stageStart + stageSize).map((entry) => ({
|
|
1795
|
+
entry,
|
|
1796
|
+
hintSnapshot: buildClaimExtractionHintSnapshot(hintState, entry)
|
|
1797
|
+
}));
|
|
1798
|
+
const stageDecisions = await executeClaimExtractionStageRequests(
|
|
1799
|
+
stageRequests,
|
|
1800
|
+
llm,
|
|
1801
|
+
config,
|
|
1802
|
+
onWarning,
|
|
1803
|
+
completedRetryEntries,
|
|
1804
|
+
totalRetryEntries,
|
|
1805
|
+
(completedEntries, totalEntries) => {
|
|
1806
|
+
completedRetryEntries = completedEntries;
|
|
1807
|
+
onProgress?.({
|
|
1808
|
+
phase: "retry",
|
|
1809
|
+
completedEntries,
|
|
1810
|
+
totalEntries,
|
|
1811
|
+
totalEligibleEntries
|
|
1812
|
+
});
|
|
1813
|
+
}
|
|
1814
|
+
);
|
|
1815
|
+
for (const { entry, decision } of stageDecisions) {
|
|
1816
|
+
diagnostics.set(entry, decision.diagnostic);
|
|
1817
|
+
if (!decision.result?.claimKey) {
|
|
1818
|
+
continue;
|
|
1819
|
+
}
|
|
1820
|
+
applyClaimExtractionResultToEntry(entry, decision.result);
|
|
1821
|
+
recordClaimKeyHint(hintState, decision.result.claimKey);
|
|
1822
|
+
extractedEntries.set(entry, decision.result);
|
|
1823
|
+
}
|
|
1824
|
+
}
|
|
1825
|
+
}
|
|
1826
|
+
for (const result of results) {
|
|
1827
|
+
for (const entry of result.entries) {
|
|
1828
|
+
const diagnostic = diagnostics.get(entry);
|
|
1829
|
+
if (diagnostic) {
|
|
1830
|
+
onDiagnostic?.(entry, diagnostic);
|
|
1831
|
+
}
|
|
1832
|
+
}
|
|
1833
|
+
}
|
|
1834
|
+
return extractedEntries;
|
|
1835
|
+
}
|
|
1836
|
+
async function executeClaimExtractionStageRequests(stageRequests, llm, config, onWarning, initialCompletedEntries, totalEntries, onProgress) {
|
|
1837
|
+
let completedEntries = initialCompletedEntries;
|
|
1838
|
+
return Promise.all(
|
|
1839
|
+
stageRequests.map(async ({ entry, hintSnapshot }) => {
|
|
1840
|
+
const decision = await extractBatchClaimKeyDecision(entry, llm, config, hintSnapshot, onWarning);
|
|
1841
|
+
completedEntries += 1;
|
|
1842
|
+
onProgress(completedEntries, totalEntries);
|
|
1843
|
+
return {
|
|
1844
|
+
entry,
|
|
1845
|
+
decision
|
|
1846
|
+
};
|
|
1847
|
+
})
|
|
1848
|
+
);
|
|
1849
|
+
}
|
|
1850
|
+
function normalizeClaimExtractionConcurrency(value) {
|
|
1851
|
+
if (!Number.isInteger(value) || value <= 0) {
|
|
1852
|
+
return 10;
|
|
1853
|
+
}
|
|
1854
|
+
return value;
|
|
1855
|
+
}
|
|
1856
|
+
function buildClaimExtractionHintSnapshot(hintState, entry) {
|
|
1857
|
+
return {
|
|
1858
|
+
hints: buildEntryHints(hintState, entry),
|
|
1859
|
+
supportClaimKeys: [...hintState.supportClaimKeys],
|
|
1860
|
+
entityPrefixStats: hintState.entityPrefixStats
|
|
1861
|
+
};
|
|
1862
|
+
}
|
|
1863
|
+
async function extractBatchClaimKeyDecision(entry, llm, config, hintSnapshot, onWarning) {
|
|
1864
|
+
try {
|
|
1865
|
+
return await extractClaimKeyDecision(
|
|
1866
|
+
{
|
|
1867
|
+
type: entry.type,
|
|
1868
|
+
subject: entry.subject,
|
|
1869
|
+
content: entry.content,
|
|
1870
|
+
tags: entry.tags,
|
|
1871
|
+
source_context: entry.source_context
|
|
1872
|
+
},
|
|
1873
|
+
llm,
|
|
1874
|
+
config,
|
|
1875
|
+
{
|
|
1876
|
+
hints: hintSnapshot.hints,
|
|
1877
|
+
onWarning,
|
|
1878
|
+
supportClaimKeys: hintSnapshot.supportClaimKeys,
|
|
1879
|
+
entityPrefixStats: hintSnapshot.entityPrefixStats
|
|
1880
|
+
}
|
|
1881
|
+
);
|
|
1882
|
+
} catch {
|
|
1883
|
+
return {
|
|
1884
|
+
result: null,
|
|
1885
|
+
diagnostic: {
|
|
1886
|
+
outcome: "extraction_failure",
|
|
1887
|
+
confidence: null,
|
|
1888
|
+
path: null,
|
|
1889
|
+
warning: "claim extraction failed unexpectedly",
|
|
1890
|
+
suggestedClaimKey: null,
|
|
1891
|
+
reviewable: false,
|
|
1892
|
+
supportEvidence: [],
|
|
1893
|
+
rationale: "claim extraction failed unexpectedly"
|
|
1894
|
+
}
|
|
1895
|
+
};
|
|
1896
|
+
}
|
|
1897
|
+
}
|
|
1898
|
+
function buildClaimExtractionSystemPrompt(hints, promptMode) {
|
|
1899
|
+
const metadataHints = [hints.userEntity ? `user_id=${hints.userEntity}` : null, hints.projectEntity ? `project=${hints.projectEntity}` : null].filter(
|
|
1900
|
+
(value) => value !== null
|
|
1901
|
+
);
|
|
1902
|
+
const groundingHints = [
|
|
1903
|
+
hints.tags.length > 0 ? `tags=${hints.tags.join(", ")}` : null,
|
|
1904
|
+
hints.sourceContext ? `source_context=${hints.sourceContext}` : null
|
|
1905
|
+
].filter((value) => value !== null);
|
|
1906
|
+
const retryInstructions = promptMode === "json_retry" ? [
|
|
1907
|
+
"",
|
|
1908
|
+
"Your previous answer was invalid JSON.",
|
|
1909
|
+
"Reply with exactly one JSON object and nothing else.",
|
|
1910
|
+
"Do not use markdown fences, commentary, or trailing text."
|
|
1911
|
+
] : [];
|
|
1912
|
+
return [
|
|
1913
|
+
"You are a knowledge entry classifier. Extract one stable claim key for a durable knowledge entry.",
|
|
1914
|
+
"A claim key names the durable slot this entry updates: entity/attribute in lowercase snake_case.",
|
|
1915
|
+
"The goal is stable slot naming, not a paraphrase of the current value.",
|
|
1916
|
+
"",
|
|
1917
|
+
"Stability rules:",
|
|
1918
|
+
"- Prefer stable slot names over transient wording.",
|
|
1919
|
+
"- Choose attribute names that still make sense if the value changes.",
|
|
1920
|
+
"- Prefer short noun-like slot names over sentence-like attribute phrases.",
|
|
1921
|
+
"- When a candidate sounds like a rule or explanation sentence, compress it into the reusable slot it governs.",
|
|
1922
|
+
"- Prefer concrete entities over pronouns, deictic phrases, or self-referential placeholders.",
|
|
1923
|
+
"- Reuse an existing entity or full claim-key example when it clearly matches the same slot.",
|
|
1924
|
+
"- Stay domain-general. The same rules apply to people, devices, services, projects, places, organizations, products, datasets, policies, and preferences.",
|
|
1925
|
+
"- If the entry states a durable rule, default, workflow, guardrail, source-of-truth rule, architecture boundary, or process constraint plus rationale, extract the primary durable slot rather than the supporting rationale.",
|
|
1926
|
+
"- Do not return no_claim just because the entry explains why the rule exists. The durable policy or system slot is usually still the target.",
|
|
1927
|
+
"- Avoid full action clauses like requires_x_to_y, preserves_x_across_y, or x_precedes_y when a shorter stable slot such as trigger_condition, context_preservation, source_of_truth, or handoff_order would carry the same durable meaning.",
|
|
1928
|
+
"",
|
|
1929
|
+
"Return no_claim when:",
|
|
1930
|
+
"- The entry is narrative, multi-fact, or mostly a story about what happened.",
|
|
1931
|
+
"- The entry is an event or milestone without one continuing slot.",
|
|
1932
|
+
"- The entity is ambiguous or can only be named with a pronoun or vague placeholder.",
|
|
1933
|
+
"- The entry does not express one durable property, preference, decision, configuration, relationship, or other stable slot.",
|
|
1934
|
+
"- When unsure, prefer no_claim over inventing a weak key.",
|
|
1935
|
+
"",
|
|
1936
|
+
"Positive examples:",
|
|
1937
|
+
`- "Jim's timezone is America/Chicago." -> jim/timezone`,
|
|
1938
|
+
'- "Jim prefers oat milk in coffee." -> jim/coffee_preference',
|
|
1939
|
+
'- "Pixel 8 is set to dark mode." -> pixel_8/theme_mode',
|
|
1940
|
+
'- "Postgres max_connections is 200." -> postgres/max_connections',
|
|
1941
|
+
'- "Agenr defaults to gpt-5.4-mini." -> agenr/default_model',
|
|
1942
|
+
'- "Mac mini updates should stay manual so debugging stays predictable." -> mac_mini/manual_update_policy',
|
|
1943
|
+
'- "Use the warehouse inventory sheet as the source of truth for stock counts." -> stock_counts/source_of_truth',
|
|
1944
|
+
'- "The repo workflow is defined by AGENTS.md, even when older notes disagree." -> repo_workflow/source_of_truth',
|
|
1945
|
+
'- "Agenr keeps pure logic in src/core and adapters outside it so future hosts can plug in cleanly." -> agenr/core_adapter_boundary',
|
|
1946
|
+
'- "The before-prompt-build hook only triggers after a real agent turn or message." -> before_prompt_build_hook/trigger_condition',
|
|
1947
|
+
'- "Durable memory preserves context across sessions." -> durable_memory/context_preservation',
|
|
1948
|
+
'- "SQLite in this environment supports window functions." -> sqlite/window_function_support',
|
|
1949
|
+
'- "Meeting-recorder transcripts need manual cleanup before durable ingest." -> meeting_recorder/transcript_cleanup_workflow',
|
|
1950
|
+
'- "Reflection synthesis can hallucinate when it summarizes from partial notes." -> reflection_synthesis/hallucination_risk',
|
|
1951
|
+
"",
|
|
1952
|
+
"Negative examples:",
|
|
1953
|
+
"- Bad: jim/america_chicago -> Good: jim/timezone",
|
|
1954
|
+
"- Bad: project_x/details -> Good: project_x/deploy_strategy",
|
|
1955
|
+
"- Bad: we/deployment_process -> Good: platform_team/deploy_strategy",
|
|
1956
|
+
"- Bad: jim/oat_milk -> Good: jim/coffee_preference",
|
|
1957
|
+
"- Bad: release_notes/because_rollbacks_are_hard -> Good: release_process/source_of_truth",
|
|
1958
|
+
"- Bad: openclaw/requires_real_agent_turn_or_message_to_trigger -> Good: openclaw/trigger_condition",
|
|
1959
|
+
"- Bad: session_continuity/durable_memory_preserves_context_across_sessions -> Good: session_continuity/context_preservation",
|
|
1960
|
+
"- Bad: incident_story/we_spent_two_hours_debugging -> Good: no_claim",
|
|
1961
|
+
"",
|
|
1962
|
+
"Field rules:",
|
|
1963
|
+
"- entity: the main concrete thing being described. It can be a person, device, service, product, organization, workflow area, or other durable system/process anchor.",
|
|
1964
|
+
"- attribute: the narrow stable slot on that entity. For policy/process entries, name the governing slot such as source_of_truth, default_mode, update_policy, architecture_boundary, deploy_strategy, or escalation_workflow.",
|
|
1965
|
+
"- Confidence: 0.0 to 1.0. Use 0.9+ only when the slot is unambiguous and durable.",
|
|
1966
|
+
"",
|
|
1967
|
+
`Known entity hints: ${hints.entityHints.length > 0 ? hints.entityHints.join(", ") : "(none)"}`,
|
|
1968
|
+
`Known claim-key examples: ${hints.claimKeyExamples.length > 0 ? hints.claimKeyExamples.join(", ") : "(none)"}`,
|
|
1969
|
+
`Current entry metadata hints: ${metadataHints.length > 0 ? metadataHints.join(", ") : "(none)"}`,
|
|
1970
|
+
`Current entry grounding clues: ${groundingHints.length > 0 ? groundingHints.join(", ") : "(none)"}`,
|
|
1971
|
+
'If project metadata is present, it may resolve phrases like "the project" when that mapping is obvious.',
|
|
1972
|
+
'If user metadata is present, it may resolve phrases like "the user", "I", or "me" when that mapping is obvious.',
|
|
1973
|
+
"Tags and source_context are local grounding clues, not proof. Use them to pick the right durable slot only when the entry content already supports that slot.",
|
|
1974
|
+
...retryInstructions,
|
|
1975
|
+
"",
|
|
1976
|
+
'Respond with JSON: { "entity": string, "attribute": string, "confidence": number, "no_claim"?: boolean }'
|
|
1977
|
+
].join("\n");
|
|
1978
|
+
}
|
|
1979
|
+
function buildClaimExtractionUserPrompt(entry) {
|
|
1980
|
+
return [`Entry type: ${entry.type}`, `Subject: ${entry.subject}`, `Content: ${entry.content}`].join("\n");
|
|
1981
|
+
}
|
|
1982
|
+
async function attemptClaimExtraction(entry, hints, llm) {
|
|
1983
|
+
const userPrompt = buildClaimExtractionUserPrompt(entry);
|
|
1984
|
+
try {
|
|
1985
|
+
return {
|
|
1986
|
+
path: "model",
|
|
1987
|
+
response: await llm.completeJson(buildClaimExtractionSystemPrompt(hints, "standard"), userPrompt)
|
|
1988
|
+
};
|
|
1989
|
+
} catch (error) {
|
|
1990
|
+
if (!isMalformedJsonError(error)) {
|
|
1991
|
+
throw error;
|
|
1992
|
+
}
|
|
1993
|
+
}
|
|
1994
|
+
return {
|
|
1995
|
+
path: "json_retry",
|
|
1996
|
+
response: await llm.completeJson(buildClaimExtractionSystemPrompt(hints, "json_retry"), userPrompt)
|
|
1997
|
+
};
|
|
1998
|
+
}
|
|
1999
|
+
function buildClaimExtractionCandidate(entry, response, hints, onWarning) {
|
|
2000
|
+
const confidence = normalizeConfidence(response.confidence);
|
|
2001
|
+
const rawEntity = typeof response.entity === "string" ? response.entity.trim() : "";
|
|
2002
|
+
const rawAttribute = typeof response.attribute === "string" ? response.attribute.trim() : "";
|
|
2003
|
+
const entity = normalizeEntity(rawEntity, hints);
|
|
2004
|
+
const attribute = normalizeClaimKeySegment(rawAttribute);
|
|
2005
|
+
const normalizedClaimKey = normalizeClaimKey(`${entity}/${attribute}`);
|
|
2006
|
+
if (!normalizedClaimKey.ok) {
|
|
2007
|
+
onWarning?.(`Claim extraction dropped claim key for "${entry.subject}": ${describeClaimKeyNormalizationFailure(normalizedClaimKey.reason)}.`);
|
|
2008
|
+
return null;
|
|
2009
|
+
}
|
|
2010
|
+
const compactedClaimKey = compactClaimKey(normalizedClaimKey.value.claimKey);
|
|
2011
|
+
if (!compactedClaimKey) {
|
|
2012
|
+
onWarning?.(`Claim extraction dropped claim key for "${entry.subject}": claim key could not be compacted safely.`);
|
|
2013
|
+
return null;
|
|
2014
|
+
}
|
|
2015
|
+
const validatedClaimKey = validateExtractedClaimKey(compactedClaimKey);
|
|
2016
|
+
if (!validatedClaimKey.ok) {
|
|
2017
|
+
onWarning?.(
|
|
2018
|
+
`Claim extraction rejected "${validatedClaimKey.value.claimKey}" for "${entry.subject}": ${describeExtractedClaimKeyRejection(validatedClaimKey.reason, validatedClaimKey.value)}.`
|
|
2019
|
+
);
|
|
2020
|
+
return null;
|
|
2021
|
+
}
|
|
2022
|
+
return {
|
|
2023
|
+
claimKey: validatedClaimKey.value.claimKey,
|
|
2024
|
+
confidence,
|
|
2025
|
+
rawEntity,
|
|
2026
|
+
rawAttribute,
|
|
2027
|
+
compactedFrom: compactedClaimKey.compactedFrom,
|
|
2028
|
+
compactionReason: compactedClaimKey.reason
|
|
2029
|
+
};
|
|
2030
|
+
}
|
|
2031
|
+
function toClaimExtractionResult(candidate, path2) {
|
|
2032
|
+
return {
|
|
2033
|
+
claimKey: candidate.claimKey,
|
|
2034
|
+
confidence: candidate.confidence,
|
|
2035
|
+
rawEntity: candidate.rawEntity,
|
|
2036
|
+
rawAttribute: candidate.rawAttribute,
|
|
2037
|
+
path: path2,
|
|
2038
|
+
...candidate.compactedFrom ? {
|
|
2039
|
+
compactedFrom: candidate.compactedFrom,
|
|
2040
|
+
compactionReason: candidate.compactionReason
|
|
2041
|
+
} : {}
|
|
2042
|
+
};
|
|
2043
|
+
}
|
|
2044
|
+
function buildAcceptedDiagnostic(result, rationale) {
|
|
2045
|
+
return {
|
|
2046
|
+
outcome: "accepted",
|
|
2047
|
+
confidence: result.confidence,
|
|
2048
|
+
path: result.path,
|
|
2049
|
+
warning: null,
|
|
2050
|
+
suggestedClaimKey: result.claimKey,
|
|
2051
|
+
reviewable: false,
|
|
2052
|
+
supportEvidence: [],
|
|
2053
|
+
rationale
|
|
2054
|
+
};
|
|
2055
|
+
}
|
|
2056
|
+
function finalizeDeterministicRepairDecision(repaired, entityPrefixStats) {
|
|
2057
|
+
const aliasCandidate = findSingletonAliasReuseCandidate(repaired, entityPrefixStats);
|
|
2058
|
+
if (!aliasCandidate) {
|
|
2059
|
+
return {
|
|
2060
|
+
result: repaired,
|
|
2061
|
+
diagnostic: buildAcceptedDiagnostic(repaired, "deterministic possessive-slot repair recovered the missing claim key")
|
|
2062
|
+
};
|
|
2063
|
+
}
|
|
2064
|
+
if (aliasCandidate.canonicalReuseSafe) {
|
|
2065
|
+
const reusedResult = rewriteClaimKeyEntityPrefix(repaired, aliasCandidate.dominantEntityPrefix);
|
|
2066
|
+
reusedResult.acceptanceRationale = `reused dominant entity family "${aliasCandidate.dominantEntityPrefix}" instead of minting singleton alias "${aliasCandidate.aliasEntityPrefix}"`;
|
|
2067
|
+
return {
|
|
2068
|
+
result: reusedResult,
|
|
2069
|
+
diagnostic: buildAcceptedDiagnostic(
|
|
2070
|
+
reusedResult,
|
|
2071
|
+
`deterministic repair reused dominant family "${aliasCandidate.dominantEntityPrefix}" instead of new singleton alias "${aliasCandidate.aliasEntityPrefix}"`
|
|
2072
|
+
)
|
|
2073
|
+
};
|
|
2074
|
+
}
|
|
2075
|
+
const suggestedClaimKey = rewriteClaimKeyEntityPrefix(repaired, aliasCandidate.dominantEntityPrefix).claimKey;
|
|
2076
|
+
return {
|
|
2077
|
+
result: null,
|
|
2078
|
+
diagnostic: {
|
|
2079
|
+
outcome: "low_confidence_candidate",
|
|
2080
|
+
confidence: repaired.confidence,
|
|
2081
|
+
path: repaired.path,
|
|
2082
|
+
warning: null,
|
|
2083
|
+
suggestedClaimKey,
|
|
2084
|
+
reviewable: true,
|
|
2085
|
+
supportEvidence: aliasCandidate.evidence.map((evidence) => evidence.kind),
|
|
2086
|
+
rationale: `deterministic repair would create singleton alias "${aliasCandidate.aliasEntityPrefix}" next to dominant trusted family "${aliasCandidate.dominantEntityPrefix}", so the new namespace was staged for review`
|
|
2087
|
+
}
|
|
2088
|
+
};
|
|
2089
|
+
}
|
|
2090
|
+
function findSingletonAliasReuseCandidate(repaired, entityPrefixStats) {
|
|
2091
|
+
const claimKey = repaired.claimKey;
|
|
2092
|
+
if (!claimKey || !entityPrefixStats || entityPrefixStats.length === 0) {
|
|
2093
|
+
return null;
|
|
2094
|
+
}
|
|
2095
|
+
const [entityPrefix = ""] = claimKey.split("/", 1);
|
|
2096
|
+
if (!entityPrefix) {
|
|
2097
|
+
return null;
|
|
2098
|
+
}
|
|
2099
|
+
const augmentedStats = summarizeAugmentedEntityPrefixStats(entityPrefixStats, entityPrefix);
|
|
2100
|
+
return detectClaimKeySingletonAliasCandidatesFromStats(augmentedStats).find((candidate) => candidate.aliasEntityPrefix === entityPrefix) ?? null;
|
|
2101
|
+
}
|
|
2102
|
+
function summarizeAugmentedEntityPrefixStats(entityPrefixStats, entityPrefix) {
|
|
2103
|
+
const existing = entityPrefixStats.find((profile) => profile.entityPrefix === entityPrefix);
|
|
2104
|
+
if (existing) {
|
|
2105
|
+
return entityPrefixStats;
|
|
2106
|
+
}
|
|
2107
|
+
return [
|
|
2108
|
+
...entityPrefixStats,
|
|
2109
|
+
{
|
|
2110
|
+
entityPrefix,
|
|
2111
|
+
activeEntryCount: 1,
|
|
2112
|
+
trustedEntryCount: 0,
|
|
2113
|
+
tentativeEntryCount: 1,
|
|
2114
|
+
unresolvedEntryCount: 0,
|
|
2115
|
+
deterministicRepairEntryCount: 1,
|
|
2116
|
+
manualEntryCount: 0,
|
|
2117
|
+
modelEntryCount: 0,
|
|
2118
|
+
jsonRetryEntryCount: 0,
|
|
2119
|
+
dreamingFamilyReuseDurableCount: 0
|
|
2120
|
+
}
|
|
2121
|
+
];
|
|
2122
|
+
}
|
|
2123
|
+
function rewriteClaimKeyEntityPrefix(result, entityPrefix) {
|
|
2124
|
+
const claimKey = result.claimKey;
|
|
2125
|
+
if (!claimKey) {
|
|
2126
|
+
return result;
|
|
2127
|
+
}
|
|
2128
|
+
const [, attribute = ""] = claimKey.split("/", 2);
|
|
2129
|
+
return {
|
|
2130
|
+
...result,
|
|
2131
|
+
claimKey: `${entityPrefix}/${attribute}`
|
|
2132
|
+
};
|
|
2133
|
+
}
|
|
2134
|
+
function formatClaimExtractionError(error) {
|
|
2135
|
+
return error instanceof Error ? error.message : String(error);
|
|
2136
|
+
}
|
|
2137
|
+
function describeSupportPromotionClass(support) {
|
|
2138
|
+
switch (support.autoApplyClass) {
|
|
2139
|
+
case "trusted_exact_reuse_grounded":
|
|
2140
|
+
return "trusted exact-key reuse with local grounding";
|
|
2141
|
+
case "trusted_family_template_grounded":
|
|
2142
|
+
return "trusted family reuse plus grounded template support";
|
|
2143
|
+
case "trusted_family_stable_slot":
|
|
2144
|
+
return "trusted family reuse plus a stable compact slot";
|
|
2145
|
+
case "trusted_family_grounded_alignment":
|
|
2146
|
+
return "trusted family reuse plus grounded dual lexical alignment";
|
|
2147
|
+
default:
|
|
2148
|
+
return "structural support";
|
|
2149
|
+
}
|
|
2150
|
+
}
|
|
2151
|
+
function tryDeterministicClaimKeyRepair(entry, hints) {
|
|
2152
|
+
const repaired = parsePossessiveClaim(entry.subject) ?? parsePossessiveStatement(entry.content);
|
|
2153
|
+
if (!repaired) {
|
|
2154
|
+
return null;
|
|
2155
|
+
}
|
|
2156
|
+
const attribute = normalizeClaimKeySegment(repaired.attribute);
|
|
2157
|
+
if (!looksLikeDeterministicAttribute(attribute)) {
|
|
2158
|
+
return null;
|
|
2159
|
+
}
|
|
2160
|
+
const entity = normalizeEntity(repaired.entity, hints);
|
|
2161
|
+
const normalizedClaimKey = normalizeClaimKey(`${entity}/${attribute}`);
|
|
2162
|
+
if (!normalizedClaimKey.ok) {
|
|
2163
|
+
return null;
|
|
2164
|
+
}
|
|
2165
|
+
const validatedClaimKey = validateExtractedClaimKey(normalizedClaimKey.value);
|
|
2166
|
+
if (!validatedClaimKey.ok) {
|
|
2167
|
+
return null;
|
|
2168
|
+
}
|
|
2169
|
+
return {
|
|
2170
|
+
claimKey: validatedClaimKey.value.claimKey,
|
|
2171
|
+
confidence: DEFAULT_REPAIR_CONFIDENCE,
|
|
2172
|
+
rawEntity: repaired.entity,
|
|
2173
|
+
rawAttribute: repaired.attribute,
|
|
2174
|
+
path: "deterministic_repair"
|
|
2175
|
+
};
|
|
2176
|
+
}
|
|
2177
|
+
async function loadClaimExtractionHintState(db) {
|
|
2178
|
+
const [entityHintResult, promptClaimKeyExampleResult, supportClaimKeyExampleResult, entityPrefixStatsResult] = await Promise.allSettled([
|
|
2179
|
+
getEntityHints(db),
|
|
2180
|
+
getClaimKeyExamples(db, MAX_CLAIM_KEY_EXAMPLES),
|
|
2181
|
+
getClaimKeyExamples(db, MAX_SUPPORT_CLAIM_KEY_EXAMPLES),
|
|
2182
|
+
getClaimKeyEntityPrefixStats(db)
|
|
2183
|
+
]);
|
|
2184
|
+
return createHintState({
|
|
2185
|
+
entityHints: entityHintResult.status === "fulfilled" ? entityHintResult.value : [],
|
|
2186
|
+
claimKeyExamples: promptClaimKeyExampleResult.status === "fulfilled" ? promptClaimKeyExampleResult.value : [],
|
|
2187
|
+
supportClaimKeys: supportClaimKeyExampleResult.status === "fulfilled" ? supportClaimKeyExampleResult.value : [],
|
|
2188
|
+
entityPrefixStats: entityPrefixStatsResult.status === "fulfilled" ? entityPrefixStatsResult.value : []
|
|
2189
|
+
});
|
|
2190
|
+
}
|
|
2191
|
+
async function getClaimKeyExamples(db, limit) {
|
|
2192
|
+
if (typeof db.getClaimKeyExamples !== "function") {
|
|
2193
|
+
return [];
|
|
2194
|
+
}
|
|
2195
|
+
return db.getClaimKeyExamples(limit);
|
|
2196
|
+
}
|
|
2197
|
+
async function getClaimKeyEntityPrefixStats(db) {
|
|
2198
|
+
if (typeof db.getClaimKeyEntityPrefixStats !== "function") {
|
|
2199
|
+
return [];
|
|
2200
|
+
}
|
|
2201
|
+
return db.getClaimKeyEntityPrefixStats();
|
|
2202
|
+
}
|
|
2203
|
+
function createHintState(input) {
|
|
2204
|
+
const claimKeyExamples = normalizeClaimKeyExamples(input.claimKeyExamples ?? []);
|
|
2205
|
+
const supportClaimKeys = normalizeSupportClaimKeys(input.supportClaimKeys ?? []);
|
|
2206
|
+
const entityHints = limitUnique(
|
|
2207
|
+
[
|
|
2208
|
+
...normalizeEntityHints(input.entityHints ?? []),
|
|
2209
|
+
...supportClaimKeys.flatMap((claimKey) => {
|
|
2210
|
+
const normalizedClaimKey = normalizeClaimKey(claimKey);
|
|
2211
|
+
return normalizedClaimKey.ok ? [normalizedClaimKey.value.entity] : [];
|
|
2212
|
+
})
|
|
2213
|
+
],
|
|
2214
|
+
MAX_ENTITY_HINTS
|
|
2215
|
+
);
|
|
2216
|
+
return {
|
|
2217
|
+
entityHints,
|
|
2218
|
+
claimKeyExamples,
|
|
2219
|
+
supportClaimKeys,
|
|
2220
|
+
entityPrefixStats: input.entityPrefixStats ?? []
|
|
2221
|
+
};
|
|
2222
|
+
}
|
|
2223
|
+
function buildEntryHints(state, entry) {
|
|
2224
|
+
return {
|
|
2225
|
+
entityHints: [...state.entityHints],
|
|
2226
|
+
claimKeyExamples: [...state.claimKeyExamples],
|
|
2227
|
+
userId: entry.user_id,
|
|
2228
|
+
project: entry.project,
|
|
2229
|
+
tags: entry.tags,
|
|
2230
|
+
sourceContext: entry.source_context
|
|
2231
|
+
};
|
|
2232
|
+
}
|
|
2233
|
+
function recordClaimKeyHint(state, claimKey) {
|
|
2234
|
+
const normalizedClaimKey = normalizeClaimKey(claimKey);
|
|
2235
|
+
if (!normalizedClaimKey.ok) {
|
|
2236
|
+
return;
|
|
2237
|
+
}
|
|
2238
|
+
state.claimKeyExamples = prependUnique(state.claimKeyExamples, normalizedClaimKey.value.claimKey, MAX_CLAIM_KEY_EXAMPLES);
|
|
2239
|
+
state.supportClaimKeys = prependUnique(state.supportClaimKeys, normalizedClaimKey.value.claimKey, MAX_SUPPORT_CLAIM_KEY_EXAMPLES);
|
|
2240
|
+
state.entityHints = prependUnique(state.entityHints, normalizedClaimKey.value.entity, MAX_ENTITY_HINTS);
|
|
2241
|
+
}
|
|
2242
|
+
function normalizeClaimExtractionHints(hints) {
|
|
2243
|
+
const claimKeyExamples = normalizeClaimKeyExamples(hints.claimKeyExamples ?? []);
|
|
2244
|
+
return {
|
|
2245
|
+
entityHints: limitUnique(
|
|
2246
|
+
[
|
|
2247
|
+
...normalizeEntityHints(hints.entityHints ?? []),
|
|
2248
|
+
...claimKeyExamples.flatMap((claimKey) => {
|
|
2249
|
+
const normalizedClaimKey = normalizeClaimKey(claimKey);
|
|
2250
|
+
return normalizedClaimKey.ok ? [normalizedClaimKey.value.entity] : [];
|
|
2251
|
+
})
|
|
2252
|
+
],
|
|
2253
|
+
MAX_ENTITY_HINTS
|
|
2254
|
+
),
|
|
2255
|
+
claimKeyExamples,
|
|
2256
|
+
userEntity: normalizeMetadataEntity(hints.userId),
|
|
2257
|
+
projectEntity: normalizeMetadataEntity(hints.project),
|
|
2258
|
+
tags: normalizeHintTags(hints.tags ?? []),
|
|
2259
|
+
sourceContext: normalizeSourceContextHint(hints.sourceContext)
|
|
2260
|
+
};
|
|
2261
|
+
}
|
|
2262
|
+
function buildPreviewOutcome(outcome, attempt) {
|
|
2263
|
+
return {
|
|
2264
|
+
outcome,
|
|
2265
|
+
confidence: normalizeConfidence(attempt.response.confidence),
|
|
2266
|
+
rawEntity: typeof attempt.response.entity === "string" ? attempt.response.entity.trim() : "",
|
|
2267
|
+
rawAttribute: typeof attempt.response.attribute === "string" ? attempt.response.attribute.trim() : "",
|
|
2268
|
+
path: attempt.path
|
|
2269
|
+
};
|
|
2270
|
+
}
|
|
2271
|
+
function normalizeConfidence(value) {
|
|
2272
|
+
if (typeof value !== "number" || !Number.isFinite(value)) {
|
|
2273
|
+
return 0;
|
|
2274
|
+
}
|
|
2275
|
+
return Math.min(1, Math.max(0, value));
|
|
2276
|
+
}
|
|
2277
|
+
function normalizeEntity(value, hints) {
|
|
2278
|
+
const normalizedValue = normalizeClaimKeySegment(value);
|
|
2279
|
+
if (normalizedValue.length === 0) {
|
|
2280
|
+
return "";
|
|
2281
|
+
}
|
|
2282
|
+
if (!SELF_REFERENTIAL_ENTITIES.has(normalizedValue)) {
|
|
2283
|
+
return normalizedValue;
|
|
2284
|
+
}
|
|
2285
|
+
if (USER_REFERENTIAL_ENTITIES.has(normalizedValue) && hints.userEntity) {
|
|
2286
|
+
return hints.userEntity;
|
|
2287
|
+
}
|
|
2288
|
+
if (PROJECT_REFERENTIAL_ENTITIES.has(normalizedValue) && hints.projectEntity) {
|
|
2289
|
+
return hints.projectEntity;
|
|
2290
|
+
}
|
|
2291
|
+
const concreteCandidates = limitUnique(
|
|
2292
|
+
[hints.projectEntity, hints.userEntity, ...hints.entityHints].filter(
|
|
2293
|
+
(candidate) => typeof candidate === "string" && candidate.length > 0
|
|
2294
|
+
),
|
|
2295
|
+
MAX_ENTITY_HINTS
|
|
2296
|
+
);
|
|
2297
|
+
if (concreteCandidates.length === 1) {
|
|
2298
|
+
return concreteCandidates[0] ?? normalizedValue;
|
|
2299
|
+
}
|
|
2300
|
+
if (hints.entityHints.length === 1) {
|
|
2301
|
+
return hints.entityHints[0] ?? normalizedValue;
|
|
2302
|
+
}
|
|
2303
|
+
return normalizedValue;
|
|
2304
|
+
}
|
|
2305
|
+
function normalizeEntityHints(entityHints) {
|
|
2306
|
+
return limitUnique(
|
|
2307
|
+
entityHints.map((entityHint) => normalizeClaimKeySegment(entityHint)).filter((entityHint) => entityHint.length > 0 && !SELF_REFERENTIAL_ENTITIES.has(entityHint)),
|
|
2308
|
+
MAX_ENTITY_HINTS
|
|
2309
|
+
);
|
|
2310
|
+
}
|
|
2311
|
+
function normalizeClaimKeyExamples(claimKeyExamples) {
|
|
2312
|
+
return limitUnique(
|
|
2313
|
+
claimKeyExamples.flatMap((claimKeyExample) => {
|
|
2314
|
+
const normalizedClaimKey = normalizeClaimKey(claimKeyExample);
|
|
2315
|
+
return normalizedClaimKey.ok ? [normalizedClaimKey.value.claimKey] : [];
|
|
2316
|
+
}),
|
|
2317
|
+
MAX_CLAIM_KEY_EXAMPLES
|
|
2318
|
+
);
|
|
2319
|
+
}
|
|
2320
|
+
function normalizeSupportClaimKeys(claimKeys) {
|
|
2321
|
+
return limitUnique(
|
|
2322
|
+
claimKeys.flatMap((claimKey) => {
|
|
2323
|
+
const normalizedClaimKey = normalizeClaimKey(claimKey);
|
|
2324
|
+
return normalizedClaimKey.ok ? [normalizedClaimKey.value.claimKey] : [];
|
|
2325
|
+
}),
|
|
2326
|
+
MAX_SUPPORT_CLAIM_KEY_EXAMPLES
|
|
2327
|
+
);
|
|
2328
|
+
}
|
|
2329
|
+
function normalizeMetadataEntity(value) {
|
|
2330
|
+
if (typeof value !== "string") {
|
|
2331
|
+
return void 0;
|
|
2332
|
+
}
|
|
2333
|
+
const normalized = normalizeClaimKeySegment(value);
|
|
2334
|
+
if (normalized.length === 0 || SELF_REFERENTIAL_ENTITIES.has(normalized) || !/[a-z]/u.test(normalized)) {
|
|
2335
|
+
return void 0;
|
|
2336
|
+
}
|
|
2337
|
+
return normalized;
|
|
2338
|
+
}
|
|
2339
|
+
function normalizeHintTags(tags) {
|
|
2340
|
+
return limitUnique(
|
|
2341
|
+
tags.map((tag) => normalizeClaimKeySegment(tag)).filter((tag) => tag.length > 0),
|
|
2342
|
+
8
|
|
2343
|
+
);
|
|
2344
|
+
}
|
|
2345
|
+
function normalizeSourceContextHint(value) {
|
|
2346
|
+
const trimmed = value?.trim();
|
|
2347
|
+
if (!trimmed) {
|
|
2348
|
+
return void 0;
|
|
2349
|
+
}
|
|
2350
|
+
return trimmed.length <= 160 ? trimmed : `${trimmed.slice(0, 157).trimEnd()}...`;
|
|
2351
|
+
}
|
|
2352
|
+
function isMalformedJsonError(error) {
|
|
2353
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
2354
|
+
return /json|unexpected token|unexpected end|unexpected non-whitespace|unterminated|position \d+/iu.test(message);
|
|
2355
|
+
}
|
|
2356
|
+
function parsePossessiveClaim(subject) {
|
|
2357
|
+
const match = /^\s*(?<entity>[^.!?\n]+?)[’']s\s+(?<attribute>[^.!?\n]+?)\s*$/iu.exec(subject);
|
|
2358
|
+
if (!match?.groups) {
|
|
2359
|
+
return null;
|
|
2360
|
+
}
|
|
2361
|
+
return {
|
|
2362
|
+
entity: stripTrailingPunctuation(match.groups.entity),
|
|
2363
|
+
attribute: stripTrailingPunctuation(match.groups.attribute)
|
|
2364
|
+
};
|
|
2365
|
+
}
|
|
2366
|
+
function parsePossessiveStatement(content) {
|
|
2367
|
+
const match = /^\s*(?<entity>[^.!?\n]+?)[’']s\s+(?<attribute>[^.!?\n]+?)\s+(?:is|are|was|were)\b/iu.exec(content);
|
|
2368
|
+
if (!match?.groups) {
|
|
2369
|
+
return null;
|
|
2370
|
+
}
|
|
2371
|
+
return {
|
|
2372
|
+
entity: stripTrailingPunctuation(match.groups.entity),
|
|
2373
|
+
attribute: stripTrailingPunctuation(match.groups.attribute)
|
|
2374
|
+
};
|
|
2375
|
+
}
|
|
2376
|
+
function stripTrailingPunctuation(value) {
|
|
2377
|
+
return value.trim().replace(/[\s"'“”‘’.,:;!?]+$/gu, "").trim();
|
|
2378
|
+
}
|
|
2379
|
+
function looksLikeDeterministicAttribute(attribute) {
|
|
2380
|
+
const parts = attribute.split("_").filter((part) => part.length > 0);
|
|
2381
|
+
if (parts.length === 0 || parts.length > 4) {
|
|
2382
|
+
return false;
|
|
2383
|
+
}
|
|
2384
|
+
const head = parts[parts.length - 1];
|
|
2385
|
+
return typeof head === "string" && DETERMINISTIC_ATTRIBUTE_HEADS.has(head);
|
|
2386
|
+
}
|
|
2387
|
+
function prependUnique(values, value, limit) {
|
|
2388
|
+
return limitUnique([value, ...values], limit);
|
|
2389
|
+
}
|
|
2390
|
+
function limitUnique(values, limit) {
|
|
2391
|
+
return Array.from(new Set(values.filter((value) => value.length > 0))).slice(0, limit);
|
|
2392
|
+
}
|
|
2393
|
+
|
|
2394
|
+
export {
|
|
2395
|
+
detectClaimKeyEntityFamilyCandidates,
|
|
2396
|
+
detectClaimKeySingletonAliasCandidates,
|
|
2397
|
+
buildTrustedClaimKeySupportSeed,
|
|
2398
|
+
evaluateClaimKeySupport,
|
|
2399
|
+
evaluateClaimKeyCompactness,
|
|
2400
|
+
normalizeGroundingTags,
|
|
2401
|
+
tokenizeGroundingText,
|
|
2402
|
+
buildDurableLocalLexicalTokens,
|
|
2403
|
+
applyClaimExtractionResultToEntry,
|
|
2404
|
+
previewClaimKeyExtraction,
|
|
2405
|
+
runBatchClaimExtraction,
|
|
2406
|
+
validateSupersessionRules,
|
|
2407
|
+
describeSupersessionRuleFailure,
|
|
2408
|
+
computeContentHash,
|
|
2409
|
+
computeNormContentHash,
|
|
2410
|
+
resolveDurableProjectScope,
|
|
2411
|
+
tryAcquireDreamingRunLock,
|
|
2412
|
+
withDreamingRunLock,
|
|
2413
|
+
withHeldDreamingRunLock,
|
|
2414
|
+
withEpisodeWriteGuard,
|
|
2415
|
+
isEpisodeWriteInProgress
|
|
2416
|
+
};
|