agenr 0.9.93 → 0.9.95
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +26 -0
- package/dist/{chunk-LQRZWCMN.js → chunk-3SQJQUIE.js} +399 -23
- package/dist/{chunk-V4TO364M.js → chunk-UUCGEA3M.js} +394 -32
- package/dist/{chunk-JTQXI332.js → chunk-VW6ZN3RY.js} +10892 -5754
- package/dist/cli-main.d.ts +1 -1
- package/dist/cli-main.js +859 -218
- package/dist/{maintain-NBNOASPQ.js → maintain-WL7KST4P.js} +159 -19
- package/dist/openclaw-plugin/index.d.ts +1 -1
- package/dist/openclaw-plugin/index.js +2 -2
- package/dist/{stream-CgMjWCQQ.d.ts → stream-BD70Hhub.d.ts} +3 -0
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,31 @@
|
|
|
1
1
|
# Changelog
|
|
2
2
|
|
|
3
|
+
## [0.9.95] - 2026-03-15
|
|
4
|
+
|
|
5
|
+
### Added
|
|
6
|
+
|
|
7
|
+
- Added a full Phase 3 structured-memory spine for canonical claims, including support accumulation, explicit `current_state` and `prior_state` anchors, and explicit committed `state_transition` anchors for replacement/migration changes.
|
|
8
|
+
- Added bounded and then broader structured retrieval support over the new state fields, including direct structured lookup, subject-key recovery from weak footholds, hybrid candidate assembly, and structure-aware embedding composition so canonical anchors can win semantically as well as lexically.
|
|
9
|
+
- Added pre-rebuild alignment work for consolidation, maintenance, conflicts, and runtime prompt guidance so rebuild-time operation follows the new canonical brain model instead of the legacy blob-brain assumptions.
|
|
10
|
+
|
|
11
|
+
### Fixed
|
|
12
|
+
|
|
13
|
+
- Fixed live recall crash behavior around retired related-entry hydration by making relation-neighbor expansion safe in the presence of retired superseded source rows, then removed active-to-retired `relations.supersedes` residue as a write-time invariant rather than leaving retrieval to tolerate it forever.
|
|
14
|
+
- Fixed consolidation and maintenance behavior to preserve Phase 3 canonical invariants: same-identity structured claims now reinforce deterministically instead of synthesizing blob merges, current/prior/transition anchors remain distinct, canonical structured claims are protected from generic retirement/reflection demotion, and superseded cleanup distinguishes true supersession from expiry residue.
|
|
15
|
+
- Fixed prompt/runtime alignment so extraction, dedup, consolidation, reflection, and rebuild-oriented prompt paths now encode the new structured-claim, role-aware, rebuild-required model instead of lagging behind older assumptions.
|
|
16
|
+
|
|
17
|
+
## [0.9.94] - 2026-03-14
|
|
18
|
+
|
|
19
|
+
### Added
|
|
20
|
+
|
|
21
|
+
- Added a Phase 2 query-normalization and retrieval-mode layer that interprets raw recall phrasing together with explicit `agenr_recall` parameters, making current-state, historical, decision, preference/policy/workflow, change/transition, and abstain-or-narrow routing explicit and traceable.
|
|
22
|
+
- Added noisy conversational/currentness/transition/param-interaction eval coverage plus surfaced-set diagnostics that measure what users actually see, including output-alignment reasons and bounded transition/currentness support shaping.
|
|
23
|
+
|
|
24
|
+
### Fixed
|
|
25
|
+
|
|
26
|
+
- Fixed recall precision and surface behavior across the full Phase 1/Phase 2 brain-quality tranche: adjacent-wrong suppression, lexical candidate survival, generic lookup query-shape handling, selected-support-only surfacing, abstain-or-narrow trust behavior, historical clear-winner qualification, and currentness/transition before-after pair handling.
|
|
27
|
+
- Fixed explicit migration/replacement transition queries in both isolated eval and realistic snapshot/live-brain paths so prior-state rows remain available for bounded before/after answers without reopening broad stale-history retrieval.
|
|
28
|
+
|
|
3
29
|
## [0.9.93] - 2026-03-13
|
|
4
30
|
|
|
5
31
|
### Added
|
|
@@ -1,12 +1,16 @@
|
|
|
1
1
|
import {
|
|
2
2
|
CREATE_IDX_ENTRIES_EMBEDDING_SQL,
|
|
3
3
|
KNOWLEDGE_TYPES,
|
|
4
|
+
STATE_ANCHOR_PREDICATE,
|
|
5
|
+
STATE_TRANSITION_PREDICATE,
|
|
4
6
|
buildProjectFilter,
|
|
7
|
+
buildStructuredClaimIdentity,
|
|
8
|
+
classifyStructuredConflictPair,
|
|
5
9
|
coerceExpiry,
|
|
6
10
|
composeEmbeddingText,
|
|
7
11
|
createLogger,
|
|
8
|
-
createRelation,
|
|
9
12
|
deleteCoRecallEdgesForEntryIds,
|
|
13
|
+
deleteSupersedesRelationsToInactiveTargets,
|
|
10
14
|
embed,
|
|
11
15
|
getClusterSummary,
|
|
12
16
|
hashEntrySourceContent,
|
|
@@ -17,13 +21,15 @@ import {
|
|
|
17
21
|
normalizeExpiry,
|
|
18
22
|
normalizePositiveInt,
|
|
19
23
|
recency,
|
|
24
|
+
recordEntrySupport,
|
|
20
25
|
resolveConflictLog,
|
|
21
26
|
resolveHigherExpiry,
|
|
22
27
|
resolveHighestExpiry,
|
|
23
28
|
runSimpleStream,
|
|
29
|
+
structuredClaimIdentityEquals,
|
|
24
30
|
toErrorMessage,
|
|
25
31
|
walCheckpoint
|
|
26
|
-
} from "./chunk-
|
|
32
|
+
} from "./chunk-VW6ZN3RY.js";
|
|
27
33
|
import {
|
|
28
34
|
MILLISECONDS_PER_DAY,
|
|
29
35
|
parseDaysBetween,
|
|
@@ -76,6 +82,83 @@ async function rebuildVectorIndex(db, options) {
|
|
|
76
82
|
return { embeddingCount, durationMs };
|
|
77
83
|
}
|
|
78
84
|
|
|
85
|
+
// src/consolidate/structured.ts
|
|
86
|
+
function normalizeSubject(value) {
|
|
87
|
+
return value.trim().toLowerCase();
|
|
88
|
+
}
|
|
89
|
+
function toIdentityEntry(entry) {
|
|
90
|
+
return {
|
|
91
|
+
type: entry.type,
|
|
92
|
+
subject: entry.subject,
|
|
93
|
+
content: entry.content,
|
|
94
|
+
importance: entry.importance ?? 5,
|
|
95
|
+
expiry: entry.expiry,
|
|
96
|
+
tags: entry.tags ?? [],
|
|
97
|
+
source: {
|
|
98
|
+
file: entry.sourceFile ?? "agenr",
|
|
99
|
+
context: entry.sourceContext ?? "consolidate"
|
|
100
|
+
},
|
|
101
|
+
...entry.subjectEntity ? { subjectEntity: entry.subjectEntity } : {},
|
|
102
|
+
...entry.subjectAttribute ? { subjectAttribute: entry.subjectAttribute } : {},
|
|
103
|
+
...entry.subjectKey ? { subjectKey: entry.subjectKey } : {},
|
|
104
|
+
...entry.claimPredicate ? { claimPredicate: entry.claimPredicate } : {},
|
|
105
|
+
...entry.claimObject ? { claimObject: entry.claimObject } : {},
|
|
106
|
+
...entry.claimRole ? { claimRole: entry.claimRole } : {},
|
|
107
|
+
...typeof entry.claimConfidence === "number" ? { claimConfidence: entry.claimConfidence } : {}
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
function isProtectedStructuredIdentity(identity) {
|
|
111
|
+
return identity.predicate === STATE_ANCHOR_PREDICATE || identity.predicate === STATE_TRANSITION_PREDICATE;
|
|
112
|
+
}
|
|
113
|
+
function sharesStructuredSpine(left, right, leftIdentity) {
|
|
114
|
+
return normalizeSubject(left.subject) === normalizeSubject(right.subject) || Boolean(leftIdentity.subjectKey) && leftIdentity.subjectKey === (right.subjectKey?.trim().toLowerCase() ?? "");
|
|
115
|
+
}
|
|
116
|
+
function getStructuredConsolidationIdentity(entry) {
|
|
117
|
+
return buildStructuredClaimIdentity(toIdentityEntry(entry));
|
|
118
|
+
}
|
|
119
|
+
function classifyStructuredConsolidationPair(left, right) {
|
|
120
|
+
const leftIdentity = getStructuredConsolidationIdentity(left);
|
|
121
|
+
const rightIdentity = getStructuredConsolidationIdentity(right);
|
|
122
|
+
if (!leftIdentity && !rightIdentity) {
|
|
123
|
+
return "generic";
|
|
124
|
+
}
|
|
125
|
+
if (leftIdentity && rightIdentity) {
|
|
126
|
+
if (structuredClaimIdentityEquals(leftIdentity, rightIdentity)) {
|
|
127
|
+
return "same_claim";
|
|
128
|
+
}
|
|
129
|
+
return "keep_distinct";
|
|
130
|
+
}
|
|
131
|
+
const structuredIdentity = leftIdentity ?? rightIdentity;
|
|
132
|
+
const structuredEntry = leftIdentity ? left : right;
|
|
133
|
+
const genericEntry = leftIdentity ? right : left;
|
|
134
|
+
if (!structuredIdentity) {
|
|
135
|
+
return "generic";
|
|
136
|
+
}
|
|
137
|
+
if (isProtectedStructuredIdentity(structuredIdentity) && sharesStructuredSpine(structuredEntry, genericEntry, structuredIdentity)) {
|
|
138
|
+
return "keep_distinct";
|
|
139
|
+
}
|
|
140
|
+
if (structuredEntry.type === "decision" && sharesStructuredSpine(structuredEntry, genericEntry, structuredIdentity)) {
|
|
141
|
+
return "keep_distinct";
|
|
142
|
+
}
|
|
143
|
+
return "generic";
|
|
144
|
+
}
|
|
145
|
+
function clusterStructuredIdentity(entries) {
|
|
146
|
+
if (entries.length === 0) {
|
|
147
|
+
return null;
|
|
148
|
+
}
|
|
149
|
+
const firstIdentity = getStructuredConsolidationIdentity(entries[0]);
|
|
150
|
+
if (!firstIdentity) {
|
|
151
|
+
return null;
|
|
152
|
+
}
|
|
153
|
+
for (let index = 1; index < entries.length; index += 1) {
|
|
154
|
+
const nextIdentity = getStructuredConsolidationIdentity(entries[index]);
|
|
155
|
+
if (!nextIdentity || !structuredClaimIdentityEquals(firstIdentity, nextIdentity)) {
|
|
156
|
+
return null;
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
return firstIdentity;
|
|
160
|
+
}
|
|
161
|
+
|
|
79
162
|
// src/consolidate/util.ts
|
|
80
163
|
var UnionFind = class {
|
|
81
164
|
parent = /* @__PURE__ */ new Map();
|
|
@@ -186,7 +269,7 @@ function toProjectValue(value) {
|
|
|
186
269
|
const s = toStringValue(value).trim();
|
|
187
270
|
return s ? s.toLowerCase() : null;
|
|
188
271
|
}
|
|
189
|
-
function
|
|
272
|
+
function normalizeSubject2(value) {
|
|
190
273
|
return value.trim().toLowerCase();
|
|
191
274
|
}
|
|
192
275
|
function collapsePreview(text, maxLength = 80) {
|
|
@@ -414,6 +497,16 @@ async function mergeNearExactDuplicates(db, options) {
|
|
|
414
497
|
expiry,
|
|
415
498
|
importance,
|
|
416
499
|
embedding,
|
|
500
|
+
content_hash,
|
|
501
|
+
source_file,
|
|
502
|
+
source_context,
|
|
503
|
+
subject_entity,
|
|
504
|
+
subject_attribute,
|
|
505
|
+
subject_key,
|
|
506
|
+
claim_predicate,
|
|
507
|
+
claim_object,
|
|
508
|
+
claim_role,
|
|
509
|
+
claim_confidence,
|
|
417
510
|
confirmations,
|
|
418
511
|
COALESCE(re_counts.recall_count, 0) AS recall_count,
|
|
419
512
|
entries.created_at
|
|
@@ -440,6 +533,16 @@ async function mergeNearExactDuplicates(db, options) {
|
|
|
440
533
|
expiry: coerceExpiry(row.expiry),
|
|
441
534
|
importance: Number.isFinite(toNumber(row.importance)) ? toNumber(row.importance) : 5,
|
|
442
535
|
embedding: mapBufferToVector(row.embedding),
|
|
536
|
+
contentHash: toStringValue(row.content_hash) || void 0,
|
|
537
|
+
sourceFile: toStringValue(row.source_file) || void 0,
|
|
538
|
+
sourceContext: toStringValue(row.source_context) || void 0,
|
|
539
|
+
subjectEntity: toStringValue(row.subject_entity).trim() || void 0,
|
|
540
|
+
subjectAttribute: toStringValue(row.subject_attribute).trim() || void 0,
|
|
541
|
+
subjectKey: toStringValue(row.subject_key).trim() || void 0,
|
|
542
|
+
claimPredicate: toStringValue(row.claim_predicate).trim() || void 0,
|
|
543
|
+
claimObject: toStringValue(row.claim_object).trim() || void 0,
|
|
544
|
+
claimRole: toStringValue(row.claim_role).trim() === "current_state" || toStringValue(row.claim_role).trim() === "prior_state" ? toStringValue(row.claim_role).trim() : void 0,
|
|
545
|
+
claimConfidence: Number.isFinite(toNumber(row.claim_confidence)) ? toNumber(row.claim_confidence) : void 0,
|
|
443
546
|
confirmations: Number.isFinite(toNumber(row.confirmations)) ? toNumber(row.confirmations) : 0,
|
|
444
547
|
recallCount: Number.isFinite(toNumber(row.recall_count)) ? toNumber(row.recall_count) : 0,
|
|
445
548
|
createdAt: toStringValue(row.created_at)
|
|
@@ -464,6 +567,14 @@ async function mergeNearExactDuplicates(db, options) {
|
|
|
464
567
|
for (let j = i + 1; j < entries.length; j += 1) {
|
|
465
568
|
const candidate = entries[j];
|
|
466
569
|
checkedPairs += 1;
|
|
570
|
+
const structuredDecision = classifyStructuredConsolidationPair(entry, candidate);
|
|
571
|
+
if (structuredDecision === "keep_distinct") {
|
|
572
|
+
continue;
|
|
573
|
+
}
|
|
574
|
+
if (structuredDecision === "same_claim") {
|
|
575
|
+
unionFind.union(entry.id, candidate.id);
|
|
576
|
+
continue;
|
|
577
|
+
}
|
|
467
578
|
const similarity = cosineSim(entry.embedding, candidate.embedding);
|
|
468
579
|
if (similarity <= MERGE_SIMILARITY_THRESHOLD) {
|
|
469
580
|
continue;
|
|
@@ -471,7 +582,7 @@ async function mergeNearExactDuplicates(db, options) {
|
|
|
471
582
|
if (entry.type !== candidate.type) {
|
|
472
583
|
continue;
|
|
473
584
|
}
|
|
474
|
-
if (
|
|
585
|
+
if (normalizeSubject2(entry.subject) !== normalizeSubject2(candidate.subject)) {
|
|
475
586
|
continue;
|
|
476
587
|
}
|
|
477
588
|
if ((entry.project ?? null) !== (candidate.project ?? null)) {
|
|
@@ -512,6 +623,7 @@ async function mergeNearExactDuplicates(db, options) {
|
|
|
512
623
|
if (!keeper) {
|
|
513
624
|
continue;
|
|
514
625
|
}
|
|
626
|
+
const keeperIdentity = getStructuredConsolidationIdentity(keeper);
|
|
515
627
|
const sources = sorted.slice(1);
|
|
516
628
|
mergedCount += sources.length;
|
|
517
629
|
if (options.verbose) {
|
|
@@ -552,7 +664,17 @@ async function mergeNearExactDuplicates(db, options) {
|
|
|
552
664
|
sql: "UPDATE entries SET superseded_by = ? WHERE id = ?",
|
|
553
665
|
args: [keeper.id, source.id]
|
|
554
666
|
});
|
|
555
|
-
|
|
667
|
+
const sourceIdentity = getStructuredConsolidationIdentity(source);
|
|
668
|
+
if (keeperIdentity && sourceIdentity && keeperIdentity.type === sourceIdentity.type && keeperIdentity.subjectKey === sourceIdentity.subjectKey && keeperIdentity.predicate === sourceIdentity.predicate && keeperIdentity.object === sourceIdentity.object && keeperIdentity.role === sourceIdentity.role && source.contentHash) {
|
|
669
|
+
await recordEntrySupport(db, {
|
|
670
|
+
entryId: keeper.id,
|
|
671
|
+
sourceContentHash: source.contentHash,
|
|
672
|
+
sourceFile: source.sourceFile,
|
|
673
|
+
sourceContext: source.sourceContext,
|
|
674
|
+
sourceCreatedAt: source.createdAt,
|
|
675
|
+
claimConfidence: source.claimConfidence
|
|
676
|
+
});
|
|
677
|
+
}
|
|
556
678
|
await db.execute({
|
|
557
679
|
sql: `
|
|
558
680
|
INSERT OR IGNORE INTO tags (entry_id, tag)
|
|
@@ -590,8 +712,10 @@ async function mergeNearExactDuplicates(db, options) {
|
|
|
590
712
|
}
|
|
591
713
|
async function cleanOrphanedRelations(db, dryRun) {
|
|
592
714
|
const orphanedCount = await countOrphanedRelations(db);
|
|
593
|
-
|
|
594
|
-
|
|
715
|
+
const supersedesResidueCount = await countInactiveSupersedesRelations(db);
|
|
716
|
+
const total = orphanedCount + supersedesResidueCount;
|
|
717
|
+
if (dryRun || total === 0) {
|
|
718
|
+
return total;
|
|
595
719
|
}
|
|
596
720
|
await db.execute({
|
|
597
721
|
sql: `
|
|
@@ -604,7 +728,26 @@ async function cleanOrphanedRelations(db, dryRun) {
|
|
|
604
728
|
`,
|
|
605
729
|
args: []
|
|
606
730
|
});
|
|
607
|
-
|
|
731
|
+
await deleteSupersedesRelationsToInactiveTargets(db);
|
|
732
|
+
return total;
|
|
733
|
+
}
|
|
734
|
+
async function countInactiveSupersedesRelations(db) {
|
|
735
|
+
const result = await db.execute({
|
|
736
|
+
sql: `
|
|
737
|
+
SELECT COUNT(*) AS count
|
|
738
|
+
FROM relations
|
|
739
|
+
WHERE relation_type = 'supersedes'
|
|
740
|
+
AND target_id IN (
|
|
741
|
+
SELECT id
|
|
742
|
+
FROM entries
|
|
743
|
+
WHERE superseded_by IS NOT NULL
|
|
744
|
+
OR retired = 1
|
|
745
|
+
)
|
|
746
|
+
`,
|
|
747
|
+
args: []
|
|
748
|
+
});
|
|
749
|
+
const count = toNumber(result.rows[0]?.count);
|
|
750
|
+
return Number.isFinite(count) ? count : 0;
|
|
608
751
|
}
|
|
609
752
|
async function ensureExpiredSentinel(db) {
|
|
610
753
|
await db.execute({
|
|
@@ -787,17 +930,41 @@ var LLM_DEDUP_BATCH_TOOL = {
|
|
|
787
930
|
description: "Return dedup results for all pairs",
|
|
788
931
|
parameters: LLM_DEDUP_BATCH_TOOL_SCHEMA
|
|
789
932
|
};
|
|
933
|
+
function formatEntry(label, entry) {
|
|
934
|
+
const lines = [`${label}:`, ` Content: ${entry.content}`];
|
|
935
|
+
if (entry.type?.trim()) {
|
|
936
|
+
lines.push(` Type: ${entry.type}`);
|
|
937
|
+
}
|
|
938
|
+
if (entry.subject?.trim()) {
|
|
939
|
+
lines.push(` Subject: ${entry.subject}`);
|
|
940
|
+
}
|
|
941
|
+
if (entry.subjectKey?.trim()) {
|
|
942
|
+
lines.push(` Subject key: ${entry.subjectKey}`);
|
|
943
|
+
}
|
|
944
|
+
if (entry.claimPredicate?.trim()) {
|
|
945
|
+
lines.push(` Claim predicate: ${entry.claimPredicate}`);
|
|
946
|
+
}
|
|
947
|
+
if (entry.claimObject?.trim()) {
|
|
948
|
+
lines.push(` Claim object: ${entry.claimObject}`);
|
|
949
|
+
}
|
|
950
|
+
if (entry.claimRole?.trim()) {
|
|
951
|
+
lines.push(` Claim role: ${entry.claimRole}`);
|
|
952
|
+
}
|
|
953
|
+
return lines.join("\n");
|
|
954
|
+
}
|
|
790
955
|
function buildLlmDedupBatchContext(pairs) {
|
|
791
956
|
const systemPrompt = [
|
|
792
957
|
"You are a deduplication assistant for knowledge entries.",
|
|
793
958
|
"For each numbered pair, decide if they express the same knowledge.",
|
|
959
|
+
"Same structured identity or same durable claim in different wording means same=true.",
|
|
960
|
+
"Different claim roles, different transition directions, or different durable values mean same=false even when the topic is shared.",
|
|
794
961
|
"Call batch_dedup_check once with your results."
|
|
795
962
|
].join("\n");
|
|
796
963
|
const pairBlocks = pairs.map(
|
|
797
964
|
(pair, index) => [
|
|
798
965
|
`Pair ${index + 1}:`,
|
|
799
|
-
|
|
800
|
-
|
|
966
|
+
formatEntry("Entry A", pair.entry),
|
|
967
|
+
formatEntry("Entry B", pair.candidate)
|
|
801
968
|
].join("\n")
|
|
802
969
|
).join("\n\n");
|
|
803
970
|
return {
|
|
@@ -824,7 +991,7 @@ var DEFAULT_NEIGHBOR_LIMIT = 20;
|
|
|
824
991
|
var MAX_ACTIVE_EMBEDDED_ENTRIES2 = 2e4;
|
|
825
992
|
var LLM_DEDUP_BATCH_SIZE = 10;
|
|
826
993
|
var LLM_DEDUP_CONCURRENCY = 5;
|
|
827
|
-
function
|
|
994
|
+
function normalizeSubject3(value) {
|
|
828
995
|
return value.trim().toLowerCase();
|
|
829
996
|
}
|
|
830
997
|
function pairKey(a, b) {
|
|
@@ -925,6 +1092,16 @@ function mapActiveEmbeddedEntry(row) {
|
|
|
925
1092
|
expiry: coerceExpiry(row.expiry),
|
|
926
1093
|
importance: Number.isFinite(toNumber(row.importance)) ? toNumber(row.importance) : 5,
|
|
927
1094
|
embedding,
|
|
1095
|
+
contentHash: toStringValue(row.content_hash) || void 0,
|
|
1096
|
+
sourceFile: toStringValue(row.source_file) || void 0,
|
|
1097
|
+
sourceContext: toStringValue(row.source_context) || void 0,
|
|
1098
|
+
subjectEntity: toStringValue(row.subject_entity).trim() || void 0,
|
|
1099
|
+
subjectAttribute: toStringValue(row.subject_attribute).trim() || void 0,
|
|
1100
|
+
subjectKey: toStringValue(row.subject_key).trim() || void 0,
|
|
1101
|
+
claimPredicate: toStringValue(row.claim_predicate).trim() || void 0,
|
|
1102
|
+
claimObject: toStringValue(row.claim_object).trim() || void 0,
|
|
1103
|
+
claimRole: toStringValue(row.claim_role).trim() === "current_state" || toStringValue(row.claim_role).trim() === "prior_state" ? toStringValue(row.claim_role).trim() : void 0,
|
|
1104
|
+
claimConfidence: Number.isFinite(toNumber(row.claim_confidence)) ? toNumber(row.claim_confidence) : void 0,
|
|
928
1105
|
confirmations: Number.isFinite(toNumber(row.confirmations)) ? toNumber(row.confirmations) : 0,
|
|
929
1106
|
recallCount: Number.isFinite(toNumber(row.recall_count)) ? toNumber(row.recall_count) : 0,
|
|
930
1107
|
createdAt: toStringValue(row.created_at),
|
|
@@ -974,6 +1151,16 @@ async function buildClusters(db, options = {}) {
|
|
|
974
1151
|
e.expiry,
|
|
975
1152
|
e.importance,
|
|
976
1153
|
e.embedding,
|
|
1154
|
+
e.content_hash,
|
|
1155
|
+
e.source_file,
|
|
1156
|
+
e.source_context,
|
|
1157
|
+
e.subject_entity,
|
|
1158
|
+
e.subject_attribute,
|
|
1159
|
+
e.subject_key,
|
|
1160
|
+
e.claim_predicate,
|
|
1161
|
+
e.claim_object,
|
|
1162
|
+
e.claim_role,
|
|
1163
|
+
e.claim_confidence,
|
|
977
1164
|
e.confirmations,
|
|
978
1165
|
COALESCE(re_counts.recall_count, 0) AS recall_count,
|
|
979
1166
|
e.created_at,
|
|
@@ -1022,10 +1209,18 @@ async function buildClusters(db, options = {}) {
|
|
|
1022
1209
|
const entry = candidates[i];
|
|
1023
1210
|
for (let j = i + 1; j < candidates.length; j += 1) {
|
|
1024
1211
|
const candidate = candidates[j];
|
|
1212
|
+
const structuredDecision = classifyStructuredConsolidationPair(entry, candidate);
|
|
1213
|
+
if (structuredDecision === "keep_distinct") {
|
|
1214
|
+
continue;
|
|
1215
|
+
}
|
|
1216
|
+
if (structuredDecision === "same_claim") {
|
|
1217
|
+
unionFind.union(entry.id, candidate.id);
|
|
1218
|
+
continue;
|
|
1219
|
+
}
|
|
1025
1220
|
const similarity = cosineSim(entry.embedding, candidate.embedding);
|
|
1026
1221
|
const key = pairKey(entry.id, candidate.id);
|
|
1027
1222
|
const sameType = entry.type === candidate.type;
|
|
1028
|
-
const sameSubject =
|
|
1223
|
+
const sameSubject = normalizeSubject3(entry.subject) === normalizeSubject3(candidate.subject);
|
|
1029
1224
|
if (sameType && similarity >= simThreshold || sameSubject && similarity >= CROSS_TYPE_SUBJECT_THRESHOLD) {
|
|
1030
1225
|
unionFind.union(entry.id, candidate.id);
|
|
1031
1226
|
continue;
|
|
@@ -1099,6 +1294,10 @@ async function buildClusters(db, options = {}) {
|
|
|
1099
1294
|
if (group.length < minCluster) {
|
|
1100
1295
|
continue;
|
|
1101
1296
|
}
|
|
1297
|
+
if (clusterStructuredIdentity(group)) {
|
|
1298
|
+
clusters.push({ entries: group });
|
|
1299
|
+
continue;
|
|
1300
|
+
}
|
|
1102
1301
|
let usedLooseUnion = false;
|
|
1103
1302
|
for (let i = 0; i < group.length && !usedLooseUnion; i += 1) {
|
|
1104
1303
|
for (let j = i + 1; j < group.length; j += 1) {
|
|
@@ -1184,7 +1383,12 @@ function formatClusterEntries(cluster, contentLimit) {
|
|
|
1184
1383
|
`- confirmations: ${entry.confirmations}`,
|
|
1185
1384
|
`- created_at: ${entry.createdAt}`,
|
|
1186
1385
|
`- content: ${content}`,
|
|
1187
|
-
`- tags: ${(entry.tags ?? []).join(", ") || "(none)"}
|
|
1386
|
+
`- tags: ${(entry.tags ?? []).join(", ") || "(none)"}`,
|
|
1387
|
+
...entry.subjectKey ? [`- subject_key: ${entry.subjectKey}`] : [],
|
|
1388
|
+
...entry.claimPredicate ? [`- claim_predicate: ${entry.claimPredicate}`] : [],
|
|
1389
|
+
...entry.claimObject ? [`- claim_object: ${entry.claimObject}`] : [],
|
|
1390
|
+
...entry.claimRole ? [`- claim_role: ${entry.claimRole}`] : [],
|
|
1391
|
+
...typeof entry.claimConfidence === "number" && Number.isFinite(entry.claimConfidence) ? [`- claim_confidence: ${entry.claimConfidence.toFixed(3)}`] : []
|
|
1188
1392
|
].join("\n");
|
|
1189
1393
|
}).join("\n\n");
|
|
1190
1394
|
}
|
|
@@ -1193,11 +1397,13 @@ function buildMergeContext(cluster) {
|
|
|
1193
1397
|
"You are a knowledge consolidation engine.",
|
|
1194
1398
|
"Merge the provided related entries into one or more canonical entries.",
|
|
1195
1399
|
"Only include information explicitly stated in the source entries. Do not infer or add details not present.",
|
|
1196
|
-
"
|
|
1400
|
+
"Do not turn distinct structured identities, role distinctions, or changed durable values into one blended blob summary.",
|
|
1401
|
+
"current_state, prior_state, and state_transition anchors must remain distinct. Split them into separate outputs when needed.",
|
|
1402
|
+
"If entries share one underlying claim, keep the merged output narrow and canonical rather than writing a broad synthesis narrative.",
|
|
1197
1403
|
"The merged entry's importance must not exceed the highest importance of its source entries. Merging entries does not make them more important - it makes them more concise. Do not inflate importance.",
|
|
1198
1404
|
"Each merged entry's content must be under 600 characters.",
|
|
1199
1405
|
"If the source entries cover distinct sub-topics that cannot be combined under 600 characters, call merge_entries multiple times - once for each focused sub-topic entry.",
|
|
1200
|
-
"Prefer splitting into focused entries over producing one large combined entry.",
|
|
1406
|
+
"Prefer splitting into focused entries over producing one large combined entry, especially when state roles or values differ.",
|
|
1201
1407
|
"Call merge_entries once per final merged entry.",
|
|
1202
1408
|
'expiry must be exactly the string "permanent" or "temporary" -- never a date, timestamp, or other value.'
|
|
1203
1409
|
].join("\n");
|
|
@@ -1397,6 +1603,143 @@ function chooseDominantType(cluster) {
|
|
|
1397
1603
|
});
|
|
1398
1604
|
return sorted[0]?.[0] ?? "fact";
|
|
1399
1605
|
}
|
|
1606
|
+
function compareKeeperPriority(a, b) {
|
|
1607
|
+
const scoreA = a.confirmations + a.recallCount;
|
|
1608
|
+
const scoreB = b.confirmations + b.recallCount;
|
|
1609
|
+
if (scoreB !== scoreA) {
|
|
1610
|
+
return scoreB - scoreA;
|
|
1611
|
+
}
|
|
1612
|
+
const createdA = Date.parse(a.createdAt);
|
|
1613
|
+
const createdB = Date.parse(b.createdAt);
|
|
1614
|
+
const safeA = Number.isFinite(createdA) ? createdA : 0;
|
|
1615
|
+
const safeB = Number.isFinite(createdB) ? createdB : 0;
|
|
1616
|
+
return safeB - safeA;
|
|
1617
|
+
}
|
|
1618
|
+
async function reinforceStructuredCluster(db, cluster, options = {}) {
|
|
1619
|
+
const sourceIds = cluster.entries.map((entry) => entry.id);
|
|
1620
|
+
if (options.dryRun) {
|
|
1621
|
+
const keeper2 = [...cluster.entries].sort(compareKeeperPriority)[0];
|
|
1622
|
+
return {
|
|
1623
|
+
mergedEntryId: keeper2?.id ?? "DRY_RUN",
|
|
1624
|
+
sourceIds,
|
|
1625
|
+
flagged: false
|
|
1626
|
+
};
|
|
1627
|
+
}
|
|
1628
|
+
const sorted = [...cluster.entries].sort(compareKeeperPriority);
|
|
1629
|
+
const keeper = sorted[0];
|
|
1630
|
+
if (!keeper) {
|
|
1631
|
+
throw new Error("[consolidate] Missing keeper for structured reinforcement cluster");
|
|
1632
|
+
}
|
|
1633
|
+
const sources = sorted.slice(1);
|
|
1634
|
+
const totalConfirmations = sorted.reduce((sum, entry) => sum + entry.confirmations, 0);
|
|
1635
|
+
const maxImportance = sorted.reduce((max, entry) => Math.max(max, entry.importance ?? 5), 0);
|
|
1636
|
+
const highestExpiry = resolveHighestExpiry(sorted.map((entry) => entry.expiry));
|
|
1637
|
+
const oldestCreatedAt = sorted.map((entry) => entry.createdAt).filter((date) => Boolean(date) && date.length > 0).sort()[0];
|
|
1638
|
+
const retiredAt = (/* @__PURE__ */ new Date()).toISOString();
|
|
1639
|
+
await db.execute("BEGIN IMMEDIATE");
|
|
1640
|
+
try {
|
|
1641
|
+
for (const source of sources) {
|
|
1642
|
+
await db.execute({
|
|
1643
|
+
sql: `
|
|
1644
|
+
INSERT INTO entry_sources (
|
|
1645
|
+
merged_entry_id,
|
|
1646
|
+
source_entry_id,
|
|
1647
|
+
original_confirmations,
|
|
1648
|
+
original_recall_count,
|
|
1649
|
+
original_created_at
|
|
1650
|
+
)
|
|
1651
|
+
VALUES (?, ?, ?, ?, ?)
|
|
1652
|
+
`,
|
|
1653
|
+
args: [keeper.id, source.id, source.confirmations, source.recallCount, source.createdAt]
|
|
1654
|
+
});
|
|
1655
|
+
await db.execute({
|
|
1656
|
+
sql: `
|
|
1657
|
+
UPDATE entries
|
|
1658
|
+
SET superseded_by = ?,
|
|
1659
|
+
retired = 1,
|
|
1660
|
+
retired_at = ?,
|
|
1661
|
+
retired_reason = ?
|
|
1662
|
+
WHERE id = ?
|
|
1663
|
+
AND retired = 0
|
|
1664
|
+
`,
|
|
1665
|
+
args: [keeper.id, retiredAt, "superseded", source.id]
|
|
1666
|
+
});
|
|
1667
|
+
await db.execute({
|
|
1668
|
+
sql: `
|
|
1669
|
+
UPDATE entries
|
|
1670
|
+
SET superseded_by = ?
|
|
1671
|
+
WHERE id = ?
|
|
1672
|
+
AND retired = 1
|
|
1673
|
+
AND superseded_by IS NULL
|
|
1674
|
+
`,
|
|
1675
|
+
args: [keeper.id, source.id]
|
|
1676
|
+
});
|
|
1677
|
+
await db.execute({
|
|
1678
|
+
sql: "UPDATE recall_events SET entry_id = ? WHERE entry_id = ?",
|
|
1679
|
+
args: [keeper.id, source.id]
|
|
1680
|
+
});
|
|
1681
|
+
await db.execute({
|
|
1682
|
+
sql: `
|
|
1683
|
+
INSERT OR IGNORE INTO tags (entry_id, tag)
|
|
1684
|
+
SELECT ?, tag
|
|
1685
|
+
FROM tags
|
|
1686
|
+
WHERE entry_id = ?
|
|
1687
|
+
`,
|
|
1688
|
+
args: [keeper.id, source.id]
|
|
1689
|
+
});
|
|
1690
|
+
if (source.contentHash) {
|
|
1691
|
+
await recordEntrySupport(db, {
|
|
1692
|
+
entryId: keeper.id,
|
|
1693
|
+
sourceContentHash: source.contentHash,
|
|
1694
|
+
sourceFile: source.sourceFile,
|
|
1695
|
+
sourceContext: source.sourceContext,
|
|
1696
|
+
sourceCreatedAt: source.createdAt,
|
|
1697
|
+
claimConfidence: source.claimConfidence
|
|
1698
|
+
});
|
|
1699
|
+
}
|
|
1700
|
+
}
|
|
1701
|
+
await deleteCoRecallEdgesForEntryIds(db, sources.map((source) => source.id));
|
|
1702
|
+
await db.execute({
|
|
1703
|
+
sql: `
|
|
1704
|
+
UPDATE entries
|
|
1705
|
+
SET merged_from = COALESCE(merged_from, 0) + ?,
|
|
1706
|
+
consolidated_at = datetime('now'),
|
|
1707
|
+
confirmations = ?,
|
|
1708
|
+
expiry = ?,
|
|
1709
|
+
importance = CASE WHEN importance < ? THEN ? ELSE importance END,
|
|
1710
|
+
created_at = CASE WHEN created_at > ? THEN ? ELSE created_at END
|
|
1711
|
+
WHERE id = ?
|
|
1712
|
+
`,
|
|
1713
|
+
args: [
|
|
1714
|
+
sources.length,
|
|
1715
|
+
totalConfirmations,
|
|
1716
|
+
highestExpiry,
|
|
1717
|
+
maxImportance,
|
|
1718
|
+
maxImportance,
|
|
1719
|
+
oldestCreatedAt ?? keeper.createdAt,
|
|
1720
|
+
oldestCreatedAt ?? keeper.createdAt,
|
|
1721
|
+
keeper.id
|
|
1722
|
+
]
|
|
1723
|
+
});
|
|
1724
|
+
await db.execute("COMMIT");
|
|
1725
|
+
} catch (error) {
|
|
1726
|
+
try {
|
|
1727
|
+
await db.execute("ROLLBACK");
|
|
1728
|
+
} catch {
|
|
1729
|
+
}
|
|
1730
|
+
throw error;
|
|
1731
|
+
}
|
|
1732
|
+
if (options.verbose) {
|
|
1733
|
+
(options.onLog ?? (() => void 0))(
|
|
1734
|
+
`[merge] reinforced=${keeper.id} sources=${sources.map((source) => source.id).join(",")}`
|
|
1735
|
+
);
|
|
1736
|
+
}
|
|
1737
|
+
return {
|
|
1738
|
+
mergedEntryId: keeper.id,
|
|
1739
|
+
sourceIds,
|
|
1740
|
+
flagged: false
|
|
1741
|
+
};
|
|
1742
|
+
}
|
|
1400
1743
|
async function mergeCluster(db, cluster, llmClient, apiKey, options = {}) {
|
|
1401
1744
|
const sourceIds = cluster.entries.map((entry) => entry.id);
|
|
1402
1745
|
const onLog = options.onLog ?? (() => void 0);
|
|
@@ -1421,6 +1764,9 @@ async function mergeCluster(db, cluster, llmClient, apiKey, options = {}) {
|
|
|
1421
1764
|
onLog(
|
|
1422
1765
|
`[merge-llm] Merging cluster of ${cluster.entries.length} entries (subject: "${subjectPreview || "unknown"}")`
|
|
1423
1766
|
);
|
|
1767
|
+
if (clusterStructuredIdentity(cluster.entries)) {
|
|
1768
|
+
return reinforceStructuredCluster(db, cluster, options);
|
|
1769
|
+
}
|
|
1424
1770
|
const sourceContents = cluster.entries.map((entry) => entry.content);
|
|
1425
1771
|
const queueFlaggedMerge = async (result, reason) => {
|
|
1426
1772
|
if (options.dryRun) {
|
|
@@ -1615,7 +1961,6 @@ async function mergeCluster(db, cluster, llmClient, apiKey, options = {}) {
|
|
|
1615
1961
|
`,
|
|
1616
1962
|
args: [mergedEntryId, source.id, source.confirmations, originalRecallCount, source.createdAt]
|
|
1617
1963
|
});
|
|
1618
|
-
await createRelation(db, mergedEntryId, source.id, "supersedes");
|
|
1619
1964
|
}
|
|
1620
1965
|
}
|
|
1621
1966
|
for (const source of cluster.entries) {
|
|
@@ -2908,13 +3253,21 @@ function toConflictLogRow(row) {
|
|
|
2908
3253
|
}
|
|
2909
3254
|
function toConflictEntry(row) {
|
|
2910
3255
|
const importance = toNumber(row.importance);
|
|
3256
|
+
const claimConfidence = toNumber(row.claim_confidence);
|
|
3257
|
+
const claimRole = toStringValue(row.claim_role);
|
|
2911
3258
|
return {
|
|
2912
3259
|
id: toStringValue(row.id),
|
|
2913
3260
|
type: toStringValue(row.type),
|
|
2914
3261
|
subject: toStringValue(row.subject),
|
|
2915
3262
|
content: toStringValue(row.content),
|
|
2916
3263
|
importance: Number.isFinite(importance) ? importance : 0,
|
|
3264
|
+
subjectEntity: toStringValue(row.subject_entity) || null,
|
|
3265
|
+
subjectAttribute: toStringValue(row.subject_attribute) || null,
|
|
2917
3266
|
subjectKey: toStringValue(row.subject_key) || null,
|
|
3267
|
+
claimPredicate: toStringValue(row.claim_predicate) || null,
|
|
3268
|
+
claimObject: toStringValue(row.claim_object) || null,
|
|
3269
|
+
claimRole: claimRole === "current_state" || claimRole === "prior_state" ? claimRole : null,
|
|
3270
|
+
claimConfidence: Number.isFinite(claimConfidence) ? claimConfidence : null,
|
|
2918
3271
|
createdAt: toStringValue(row.created_at)
|
|
2919
3272
|
};
|
|
2920
3273
|
}
|
|
@@ -2929,7 +3282,20 @@ async function getEntriesByIds(db, entryIds) {
|
|
|
2929
3282
|
const placeholders = chunk.map(() => "?").join(", ");
|
|
2930
3283
|
const rows = await db.execute({
|
|
2931
3284
|
sql: `
|
|
2932
|
-
SELECT
|
|
3285
|
+
SELECT
|
|
3286
|
+
id,
|
|
3287
|
+
type,
|
|
3288
|
+
subject,
|
|
3289
|
+
content,
|
|
3290
|
+
importance,
|
|
3291
|
+
subject_entity,
|
|
3292
|
+
subject_attribute,
|
|
3293
|
+
subject_key,
|
|
3294
|
+
claim_predicate,
|
|
3295
|
+
claim_object,
|
|
3296
|
+
claim_role,
|
|
3297
|
+
claim_confidence,
|
|
3298
|
+
created_at
|
|
2933
3299
|
FROM entries
|
|
2934
3300
|
WHERE id IN (${placeholders})
|
|
2935
3301
|
`,
|
|
@@ -3016,7 +3382,7 @@ async function getPendingConflictsWithEntries(db, options) {
|
|
|
3016
3382
|
async function resolveConflict(opts) {
|
|
3017
3383
|
const { db, conflictId, resolution } = opts;
|
|
3018
3384
|
const resolvedBy = opts.resolvedBy ?? "user";
|
|
3019
|
-
|
|
3385
|
+
let reasoning = opts.reasoning ?? null;
|
|
3020
3386
|
log2.info(`Resolving conflict ${conflictId} as ${resolution} by ${resolvedBy}`);
|
|
3021
3387
|
try {
|
|
3022
3388
|
const conflictRow = await getConflictRowById(db, conflictId);
|
|
@@ -3034,10 +3400,20 @@ async function resolveConflict(opts) {
|
|
|
3034
3400
|
log2.info(
|
|
3035
3401
|
`Conflict: ${conflict.relation} between "${conflict.entryA.subject}" and "${conflict.entryB.subject}" (confidence: ${conflict.confidence})`
|
|
3036
3402
|
);
|
|
3403
|
+
const structuredGuard = classifyStructuredConflictPair(
|
|
3404
|
+
conflict.entryA,
|
|
3405
|
+
conflict.entryB
|
|
3406
|
+
);
|
|
3407
|
+
const effectiveResolution = structuredGuard.decision === "keep_both" && (resolution === "keep-new" || resolution === "keep-old") ? "keep-both" : resolution;
|
|
3408
|
+
if (effectiveResolution !== resolution) {
|
|
3409
|
+
const guardReason = `Protected canonical structured pair: ${structuredGuard.reason}.`;
|
|
3410
|
+
reasoning = reasoning ? `${reasoning} ${guardReason}` : guardReason;
|
|
3411
|
+
log2.info(`Conflict ${conflictId}: overriding ${resolution} to keep-both (${structuredGuard.reason})`);
|
|
3412
|
+
}
|
|
3037
3413
|
let retiredEntryId = null;
|
|
3038
|
-
if (
|
|
3414
|
+
if (effectiveResolution === "keep-new") {
|
|
3039
3415
|
retiredEntryId = conflictRow.existingEntryId;
|
|
3040
|
-
} else if (
|
|
3416
|
+
} else if (effectiveResolution === "keep-old") {
|
|
3041
3417
|
retiredEntryId = conflictRow.newEntryId;
|
|
3042
3418
|
}
|
|
3043
3419
|
const transaction = await db.transaction("write");
|
|
@@ -3062,7 +3438,7 @@ async function resolveConflict(opts) {
|
|
|
3062
3438
|
args: [retiredEntryId]
|
|
3063
3439
|
});
|
|
3064
3440
|
}
|
|
3065
|
-
await resolveConflictLog(transaction, conflictId,
|
|
3441
|
+
await resolveConflictLog(transaction, conflictId, effectiveResolution, { reasoning, resolvedBy });
|
|
3066
3442
|
await transaction.commit();
|
|
3067
3443
|
} catch (error) {
|
|
3068
3444
|
try {
|
|
@@ -3072,11 +3448,11 @@ async function resolveConflict(opts) {
|
|
|
3072
3448
|
throw error;
|
|
3073
3449
|
}
|
|
3074
3450
|
log2.info(
|
|
3075
|
-
`Resolved: ${
|
|
3451
|
+
`Resolved: ${effectiveResolution}, retired entry: ${retiredEntryId ?? "none"}, reasoning length: ${reasoning?.length ?? 0}`
|
|
3076
3452
|
);
|
|
3077
3453
|
return {
|
|
3078
3454
|
conflictId,
|
|
3079
|
-
resolution,
|
|
3455
|
+
resolution: effectiveResolution,
|
|
3080
3456
|
resolvedBy,
|
|
3081
3457
|
reasoning,
|
|
3082
3458
|
retiredEntryId
|