agenr 0.12.1 → 0.12.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -1,5 +1,28 @@
1
1
  # Changelog
2
2
 
3
+ ## [0.12.2] - 2026-03-22
4
+
5
+ ### Recall Scoping Fixes
6
+
7
+ - **Wildcard project passthrough.** `project: "*"` no longer gets silently dropped to `undefined` during recall request building. The wildcard marker now flows through the full stack to `hasWildcardProjectOverride` in `prepareRecallInputs`, enabling true cross-project recall.
8
+ - **Default project fallback for unscoped recall.** When an agent calls `agenr_recall` without an explicit `project`, the session's default project is now used as a `universal_first` hint — searching the default project first with null-project fallback. Previously, unscoped recall defaulted to null-project-only entries, silently returning near-empty results for project-heavy corpora.
9
+ - **Wildcard default when no project context.** When neither an explicit project nor a session default is available, unscoped recall now defaults to wildcard (`*`) cross-project search instead of null-project-only.
10
+
11
+ ### Browse Recall
12
+
13
+ - **Temporal proximity rebalancing.** Browse mode recall now prioritizes temporal proximity over importance with a diversity pass, better surfacing recent entries during temporal exploration.
14
+ - **Removed default 1d since window.** Browse mode no longer applies a default 1-day `since` window, allowing full temporal exploration of the corpus.
15
+
16
+ ### Update & Retire Improvements
17
+
18
+ - **Expiry changes via `agenr_update`.** The update tool now supports changing an entry's expiry tier (`core` → `permanent`, etc.) without retiring and re-creating it. Entry history (confirmations, recall count, created_at) is preserved.
19
+ - **Subject selectors for update and retire.** Both `agenr_update` and `agenr_retire` now accept `subject` as an alternative to `entry_id`. Subject matching is case-insensitive exact match; when multiple entries share a subject, the most recent is targeted.
20
+ - **Agent action replay.** Retire and update operations now support recall target hints for agent action replay workflows.
21
+
22
+ ### Maintenance
23
+
24
+ - **Vector integrity detection.** New `vector-integrity` maintain task detects and repairs drift between the vector shadow table and the entries table.
25
+
3
26
  ## [0.12.1] - 2026-03-21
4
27
 
5
28
  ### Post-Ingest Quality Fixes
package/README.md CHANGED
@@ -162,7 +162,7 @@ agenr recall "package manager"
162
162
  tags: tooling, package-manager
163
163
  ```
164
164
 
165
- Recall supports date range queries (`--since 14d --until 7d`), temporal browse mode (`--browse --since 1d`), and around-date targeting (`--around 2026-02-15 --around-radius 14`) to rank entries by distance from a specific date.
165
+ Recall supports date range queries (`--since 14d --until 7d`), temporal browse mode (`--browse --since 1d`), and around-date targeting (`--around 2026-02-15 --around-radius 14`) to rank entries primarily by distance from a specific date with importance used as a secondary tiebreaker.
166
166
 
167
167
  ### Cross-session Handoff
168
168
 
@@ -257,7 +257,7 @@ This exposes four MCP tools: `agenr_recall`, `agenr_extract`, `agenr_retire`, an
257
257
  | `agenr store [files...]` | Store entries with semantic dedup |
258
258
  | `agenr recall [query]` | Semantic + memory-aware recall. Use `--since`/`--until` for date ranges, `--around` for target-date ranking, `--browse` for temporal mode. |
259
259
  | `agenr retire [subject]` | Retire a stale entry (hidden, not deleted). Match by subject or `--id`. |
260
- | `agenr update --id <id> --importance <n>` | Update an entry in place. Currently supports importance only. |
260
+ | `agenr update --id <id> [--importance <n>] [--expiry <level>]` | Update mutable entry metadata in place. Supports importance and expiry; pass at least one. |
261
261
  | `agenr watch [file]` | Live-watch files/directories, auto-extract knowledge |
262
262
  | `agenr watcher install` | Install background watch daemon (macOS launchd) |
263
263
  | `agenr watcher status` | Show daemon status (running/stopped, pid, watched file, recent logs) |
@@ -0,0 +1,34 @@
1
+ // src/domain/watcher-demotion.ts
2
+ var WATCHER_DEMOTED_TAG = "watcher-demoted";
3
+ var DEFAULT_OPENCLAW_WATCHER_DEMOTION_IMPORTANCE_CAP = 4;
4
+ var DEFAULT_OPENCLAW_WATCHER_DEMOTION_TTL_DAYS = 30;
5
+ function clampInteger(value, floor, fallback) {
6
+ if (typeof value !== "number" || !Number.isFinite(value)) {
7
+ return fallback;
8
+ }
9
+ return Math.max(floor, Math.floor(value));
10
+ }
11
+ function resolveOpenClawWatcherDemotionConfig(config) {
12
+ const watcherConfig = config?.watcher?.openclawDemotion;
13
+ return {
14
+ enabled: watcherConfig?.enabled !== false,
15
+ importanceCap: Math.min(
16
+ 10,
17
+ clampInteger(
18
+ watcherConfig?.importanceCap,
19
+ 1,
20
+ DEFAULT_OPENCLAW_WATCHER_DEMOTION_IMPORTANCE_CAP
21
+ )
22
+ ),
23
+ ttlDays: clampInteger(
24
+ watcherConfig?.ttlDays,
25
+ 1,
26
+ DEFAULT_OPENCLAW_WATCHER_DEMOTION_TTL_DAYS
27
+ )
28
+ };
29
+ }
30
+
31
+ export {
32
+ WATCHER_DEMOTED_TAG,
33
+ resolveOpenClawWatcherDemotionConfig
34
+ };
@@ -47,7 +47,7 @@ import {
47
47
  createAppRuntime,
48
48
  readAppDbSession,
49
49
  resolveDefaultAppRuntimeDeps
50
- } from "./chunk-BD3SRNYZ.js";
50
+ } from "./chunk-YFOFO2FC.js";
51
51
  import {
52
52
  mapStoredEntry,
53
53
  shapeRecallText
@@ -65,7 +65,7 @@ import {
65
65
  import {
66
66
  createLogger,
67
67
  walCheckpoint
68
- } from "./chunk-4GIJ227W.js";
68
+ } from "./chunk-HMBONTF3.js";
69
69
  import {
70
70
  DEFAULT_TASK_MODEL,
71
71
  resolveClaimExtractionBatchSize,
@@ -948,6 +948,37 @@ async function buildEntityRegistry(db) {
948
948
  return buildEntityRegistryFromRows(rows);
949
949
  }
950
950
 
951
+ // src/utils/expiry.ts
952
+ var EXPIRY_SET = new Set(EXPIRY_LEVELS);
953
+ var EXPIRY_PRIORITY = {
954
+ temporary: 0,
955
+ permanent: 1,
956
+ core: 2
957
+ };
958
+ function normalizeExpiry(value) {
959
+ if (typeof value !== "string") {
960
+ return void 0;
961
+ }
962
+ const normalized = value.trim().toLowerCase();
963
+ if (!normalized) {
964
+ return void 0;
965
+ }
966
+ return EXPIRY_SET.has(normalized) ? normalized : void 0;
967
+ }
968
+ function coerceExpiry(value, fallback = "temporary") {
969
+ return normalizeExpiry(value) ?? fallback;
970
+ }
971
+ function resolveHigherExpiry(a, b) {
972
+ return EXPIRY_PRIORITY[a] >= EXPIRY_PRIORITY[b] ? a : b;
973
+ }
974
+ function resolveHighestExpiry(expiries) {
975
+ let highest = "temporary";
976
+ for (const expiry of expiries) {
977
+ highest = resolveHigherExpiry(highest, expiry);
978
+ }
979
+ return highest;
980
+ }
981
+
951
982
  // src/db/lockfile.ts
952
983
  import fs from "fs";
953
984
  import os from "os";
@@ -2301,37 +2332,6 @@ function evaluateConflictsForRejection(conflicts, newEntry, config) {
2301
2332
  };
2302
2333
  }
2303
2334
 
2304
- // src/utils/expiry.ts
2305
- var EXPIRY_SET = new Set(EXPIRY_LEVELS);
2306
- var EXPIRY_PRIORITY = {
2307
- temporary: 0,
2308
- permanent: 1,
2309
- core: 2
2310
- };
2311
- function normalizeExpiry(value) {
2312
- if (typeof value !== "string") {
2313
- return void 0;
2314
- }
2315
- const normalized = value.trim().toLowerCase();
2316
- if (!normalized) {
2317
- return void 0;
2318
- }
2319
- return EXPIRY_SET.has(normalized) ? normalized : void 0;
2320
- }
2321
- function coerceExpiry(value, fallback = "temporary") {
2322
- return normalizeExpiry(value) ?? fallback;
2323
- }
2324
- function resolveHigherExpiry(a, b) {
2325
- return EXPIRY_PRIORITY[a] >= EXPIRY_PRIORITY[b] ? a : b;
2326
- }
2327
- function resolveHighestExpiry(expiries) {
2328
- let highest = "temporary";
2329
- for (const expiry of expiries) {
2330
- highest = resolveHigherExpiry(highest, expiry);
2331
- }
2332
- return highest;
2333
- }
2334
-
2335
2335
  // src/memory/store/mutations.ts
2336
2336
  async function applyEntryMutation(repository, processed, embedFn, apiKey, cache, config, options) {
2337
2337
  const mutation = processed.mutation;
@@ -5598,6 +5598,47 @@ async function finalizeRecallResults(params) {
5598
5598
  return results;
5599
5599
  }
5600
5600
 
5601
+ // src/domain/recall/browse-selection.ts
5602
+ var DEFAULT_BROWSE_DIVERSITY_TARGET = 4;
5603
+ function normalizeEntryType(result) {
5604
+ const rawType = result.entry.type;
5605
+ return typeof rawType === "string" ? rawType.trim().toLowerCase() : "";
5606
+ }
5607
+ function diversifyBrowseResults(results, limit, diversityTarget = DEFAULT_BROWSE_DIVERSITY_TARGET) {
5608
+ const normalizedLimit = Math.max(0, Math.floor(limit));
5609
+ if (normalizedLimit === 0 || results.length === 0) {
5610
+ return [];
5611
+ }
5612
+ const normalizedDiversityTarget = Math.max(0, Math.floor(diversityTarget));
5613
+ const seedTarget = Math.min(normalizedLimit, normalizedDiversityTarget);
5614
+ const selected = [];
5615
+ const selectedIds = /* @__PURE__ */ new Set();
5616
+ const admittedTypes = /* @__PURE__ */ new Set();
5617
+ for (const result of results) {
5618
+ if (selected.length >= seedTarget) {
5619
+ break;
5620
+ }
5621
+ const normalizedType = normalizeEntryType(result);
5622
+ if (!normalizedType || admittedTypes.has(normalizedType)) {
5623
+ continue;
5624
+ }
5625
+ selected.push(result);
5626
+ selectedIds.add(result.entry.id);
5627
+ admittedTypes.add(normalizedType);
5628
+ }
5629
+ for (const result of results) {
5630
+ if (selected.length >= normalizedLimit) {
5631
+ break;
5632
+ }
5633
+ if (selectedIds.has(result.entry.id)) {
5634
+ continue;
5635
+ }
5636
+ selected.push(result);
5637
+ selectedIds.add(result.entry.id);
5638
+ }
5639
+ return selected;
5640
+ }
5641
+
5601
5642
  // src/domain/recall/lexical.ts
5602
5643
  var STOP_TOKENS = /* @__PURE__ */ new Set([
5603
5644
  "a",
@@ -5754,6 +5795,8 @@ var DEFAULT_RECALL_SATURATION = 10;
5754
5795
  var DEFAULT_WARM_START_THRESHOLD = 3;
5755
5796
  var DEFAULT_SYNTHETIC_FLOOR = 0.1;
5756
5797
  var DEFAULT_AGENT_SOURCE_BONUS = 0.05;
5798
+ var BROWSE_RECENCY_WEIGHT = 0.8;
5799
+ var BROWSE_IMPORTANCE_WEIGHT = 0.2;
5757
5800
  var MISSING_RECALL_DAYS = 99999;
5758
5801
  var EMPTY_RECALL_METRICS = {
5759
5802
  totalCount: 0,
@@ -5878,7 +5921,9 @@ function browseRecencyFactor(entry, now, aroundDate, aroundRadius = DEFAULT_AROU
5878
5921
  return resolveRecallRecency(entry, now, aroundDate, aroundRadius);
5879
5922
  }
5880
5923
  function scoreBrowseEntry(entry, now, aroundDate, aroundRadius = DEFAULT_AROUND_RADIUS_DAYS) {
5881
- return clamp01((browseRecencyFactor(entry, now, aroundDate, aroundRadius) + importanceScore(entry.importance)) / 2);
5924
+ const recency2 = browseRecencyFactor(entry, now, aroundDate, aroundRadius);
5925
+ const importance = importanceScore(entry.importance);
5926
+ return clamp01(recency2 * BROWSE_RECENCY_WEIGHT + importance * BROWSE_IMPORTANCE_WEIGHT);
5882
5927
  }
5883
5928
  function scoreSessionEntry(entry, effectiveNow, freshnessNow, metricsMap, aroundDate, aroundRadius = DEFAULT_AROUND_RADIUS_DAYS, config) {
5884
5929
  return scoreEntryWithBreakdown(
@@ -6261,8 +6306,21 @@ async function fetchBrowseCandidates(params) {
6261
6306
  whereClauses.push("platform = ?");
6262
6307
  args.push(params.query.platform);
6263
6308
  }
6264
- args.push(params.limit);
6265
6309
  const whereClause = whereClauses.length > 0 ? "WHERE " + whereClauses.join(" AND ") : "";
6310
+ const orderBy = params.aroundDate ? `
6311
+ ORDER BY
6312
+ ABS(julianday(created_at) - julianday(?)) ASC,
6313
+ importance DESC,
6314
+ created_at DESC
6315
+ ` : `
6316
+ ORDER BY
6317
+ created_at DESC,
6318
+ importance DESC
6319
+ `;
6320
+ if (params.aroundDate) {
6321
+ args.push(params.aroundDate.toISOString());
6322
+ }
6323
+ args.push(params.limit);
6266
6324
  const result = await params.db.execute({
6267
6325
  sql: `
6268
6326
  SELECT
@@ -6270,10 +6328,10 @@ async function fetchBrowseCandidates(params) {
6270
6328
  FROM entries
6271
6329
  ${whereClause}
6272
6330
  -- SQL pre-sort is a best-effort approximation only.
6273
- -- Final order is determined by scoreBrowseEntry() (importance * recency decay)
6274
- -- which re-sorts post-fetch. The over-fetch buffer (limit*3, min 50)
6331
+ -- Final order is determined by recency-first browse scoring and a post-score
6332
+ -- diversity pass. The over-fetch buffer (limit*3, min 50)
6275
6333
  -- ensures the final top-N are present in the candidate pool.
6276
- ORDER BY importance DESC, created_at DESC
6334
+ ${orderBy}
6277
6335
  LIMIT ?
6278
6336
  `,
6279
6337
  args
@@ -6499,6 +6557,7 @@ async function recallBrowseMode(params) {
6499
6557
  query: browseQuery,
6500
6558
  limit: browseLimit,
6501
6559
  now: params.now,
6560
+ aroundDate: params.prepared.temporal.aroundDate,
6502
6561
  projectFilter: params.prepared.scope.resolution.primaryFilter
6503
6562
  });
6504
6563
  const filtered = browseCandidates.filter(
@@ -6519,7 +6578,11 @@ async function recallBrowseMode(params) {
6519
6578
  aroundRadiusDays: params.prepared.temporal.aroundRadiusDays
6520
6579
  });
6521
6580
  scored.sort((left, right) => right.score - left.score);
6522
- return scored.slice(0, requestedLimit);
6581
+ const results = diversifyBrowseResults(scored, requestedLimit, DEFAULT_BROWSE_DIVERSITY_TARGET);
6582
+ if (params.trace) {
6583
+ params.trace.finalResultIds = results.map((result) => result.entry.subject ?? result.entry.id);
6584
+ }
6585
+ return results;
6523
6586
  }
6524
6587
  async function retrieveCandidates(params) {
6525
6588
  if (!params.prepared.retrievalSearchText) {
@@ -6924,11 +6987,12 @@ function resolvePreparedRecallScope(query, normalizedQuery) {
6924
6987
  const explicitWildcardProject = hasWildcardProjectOverride(query.project);
6925
6988
  const explicitProjects = explicitWildcardProject ? [] : parseProjectList(query.project);
6926
6989
  const explicitExcludedProjects = parseProjectList(query.excludeProject);
6927
- const inferredProjectHints = deriveRecallProjectScopeHints({
6990
+ const inferredProjectHints = explicitWildcardProject ? [] : deriveRecallProjectScopeHints({
6928
6991
  queryText: normalizedQuery?.searchText ?? query.text ?? "",
6929
6992
  explicitProjects
6930
6993
  });
6931
- const projectHints = explicitProjects.length > 0 ? explicitProjects : inferredProjectHints;
6994
+ const callerProjectHints = explicitProjects.length > 0 || explicitWildcardProject ? [] : parseProjectList(query.projectHints).filter((projectHint) => projectHint !== "*");
6995
+ const projectHints = explicitProjects.length > 0 ? explicitProjects : Array.from(/* @__PURE__ */ new Set([...inferredProjectHints, ...callerProjectHints]));
6932
6996
  const intent = projectHints.length > 0 ? "project" : normalizedQuery?.scopeIntent ?? "ambiguous";
6933
6997
  if (query.universalOnly === true) {
6934
6998
  return {
@@ -650,6 +650,195 @@ var CREATE_INDEX_STATEMENTS = [
650
650
  "CREATE UNIQUE INDEX IF NOT EXISTS idx_ingest_log_file_hash ON ingest_log(file_path, content_hash)"
651
651
  ];
652
652
 
653
+ // src/db/vector-index.ts
654
+ var ACTIVE_EMBEDDED_WHERE_SQL = `
655
+ embedding IS NOT NULL
656
+ AND retired = 0
657
+ AND superseded_by IS NULL
658
+ `;
659
+ var SELF_UPDATE_ACTIVE_EMBEDDINGS_SQL = `
660
+ UPDATE entries
661
+ SET embedding = embedding
662
+ WHERE ${ACTIVE_EMBEDDED_WHERE_SQL}
663
+ `;
664
+ var TEMP_REPAIR_TABLE = "_vec_repair";
665
+ var REPAIR_CHUNK_SIZE = 100;
666
+ function toNumber(value) {
667
+ if (typeof value === "number") {
668
+ return value;
669
+ }
670
+ if (typeof value === "bigint") {
671
+ return Number(value);
672
+ }
673
+ if (typeof value === "string" && value.trim()) {
674
+ return Number(value);
675
+ }
676
+ return Number.NaN;
677
+ }
678
+ function toFiniteCount(value) {
679
+ const numeric = toNumber(value);
680
+ if (!Number.isFinite(numeric)) {
681
+ return 0;
682
+ }
683
+ return Math.max(0, Math.trunc(numeric));
684
+ }
685
+ function normalizeRowid(value) {
686
+ const numeric = toNumber(value);
687
+ if (!Number.isFinite(numeric)) {
688
+ return null;
689
+ }
690
+ const rowid = Math.trunc(numeric);
691
+ return rowid > 0 ? rowid : null;
692
+ }
693
+ function formatShadowMismatch(stats) {
694
+ const details = [`active=${stats.embeddingCount}`, `shadow=${stats.shadowCount}`];
695
+ if (stats.missingRowids.length > 0) {
696
+ details.push(`missing=${stats.missingRowids.length}`);
697
+ }
698
+ return details.join(", ");
699
+ }
700
+ async function rollbackQuietly(db) {
701
+ try {
702
+ await db.execute("ROLLBACK");
703
+ } catch {
704
+ }
705
+ }
706
+ async function dropRepairTableQuietly(db) {
707
+ try {
708
+ await db.execute(`DROP TABLE IF EXISTS ${TEMP_REPAIR_TABLE}`);
709
+ } catch {
710
+ }
711
+ }
712
+ async function repairMissingRowids(db, missingRowids) {
713
+ if (missingRowids.length === 0) {
714
+ return;
715
+ }
716
+ await db.execute(`DROP TABLE IF EXISTS ${TEMP_REPAIR_TABLE}`);
717
+ await db.execute(`
718
+ CREATE TEMP TABLE ${TEMP_REPAIR_TABLE} (
719
+ rid INTEGER PRIMARY KEY,
720
+ emb BLOB NOT NULL
721
+ )
722
+ `);
723
+ try {
724
+ for (let index = 0; index < missingRowids.length; index += REPAIR_CHUNK_SIZE) {
725
+ const chunk = missingRowids.slice(index, index + REPAIR_CHUNK_SIZE);
726
+ if (chunk.length === 0) {
727
+ continue;
728
+ }
729
+ const placeholders = chunk.map(() => "?").join(", ");
730
+ await db.execute(`DELETE FROM ${TEMP_REPAIR_TABLE}`);
731
+ await db.execute({
732
+ sql: `
733
+ INSERT INTO ${TEMP_REPAIR_TABLE} (rid, emb)
734
+ SELECT rowid, embedding
735
+ FROM entries
736
+ WHERE rowid IN (${placeholders})
737
+ `,
738
+ args: chunk
739
+ });
740
+ await db.execute({
741
+ sql: `
742
+ UPDATE entries
743
+ SET embedding = NULL
744
+ WHERE rowid IN (${placeholders})
745
+ `,
746
+ args: chunk
747
+ });
748
+ await db.execute({
749
+ sql: `
750
+ UPDATE entries
751
+ SET embedding = (
752
+ SELECT emb
753
+ FROM ${TEMP_REPAIR_TABLE}
754
+ WHERE rid = entries.rowid
755
+ )
756
+ WHERE rowid IN (${placeholders})
757
+ `,
758
+ args: chunk
759
+ });
760
+ }
761
+ } finally {
762
+ await dropRepairTableQuietly(db);
763
+ }
764
+ }
765
+ async function getVectorIndexShadowStats(db) {
766
+ const countsResult = await db.execute(`
767
+ SELECT
768
+ (SELECT COUNT(*) FROM entries WHERE ${ACTIVE_EMBEDDED_WHERE_SQL}) AS embedding_count,
769
+ (SELECT COUNT(*) FROM idx_entries_embedding_shadow) AS shadow_count
770
+ `);
771
+ const countsRow = countsResult.rows[0];
772
+ const embeddingCount = toFiniteCount(countsRow?.embedding_count);
773
+ const shadowCount = toFiniteCount(countsRow?.shadow_count);
774
+ const missingResult = await db.execute(`
775
+ SELECT e.rowid AS rowid
776
+ FROM entries AS e
777
+ LEFT JOIN idx_entries_embedding_shadow AS s
778
+ ON s.index_key = e.rowid
779
+ WHERE e.embedding IS NOT NULL
780
+ AND e.retired = 0
781
+ AND e.superseded_by IS NULL
782
+ AND s.index_key IS NULL
783
+ ORDER BY e.rowid ASC
784
+ `);
785
+ const missingRowids = missingResult.rows.map((row) => normalizeRowid(row.rowid)).filter((rowid) => rowid !== null);
786
+ return {
787
+ embeddingCount,
788
+ shadowCount,
789
+ missingRowids
790
+ };
791
+ }
792
+ async function rebuildVectorIndex(db, options) {
793
+ const start = Date.now();
794
+ const onLog = options?.onLog ?? (() => void 0);
795
+ await db.execute("BEGIN IMMEDIATE");
796
+ try {
797
+ await db.execute("DROP INDEX IF EXISTS idx_entries_embedding");
798
+ await db.execute(CREATE_IDX_ENTRIES_EMBEDDING_SQL);
799
+ await db.execute(SELF_UPDATE_ACTIVE_EMBEDDINGS_SQL);
800
+ const beforeRepair = await getVectorIndexShadowStats(db);
801
+ let repairedCount = 0;
802
+ if (beforeRepair.missingRowids.length > 0) {
803
+ repairedCount = beforeRepair.missingRowids.length;
804
+ onLog(`[vector-index] repairing ${repairedCount} entries missing from shadow table`);
805
+ await repairMissingRowids(db, beforeRepair.missingRowids);
806
+ }
807
+ const afterRepair = await getVectorIndexShadowStats(db);
808
+ if (afterRepair.missingRowids.length > 0 || afterRepair.shadowCount !== afterRepair.embeddingCount) {
809
+ throw new Error(`Vector index shadow table desynced after rebuild (${formatShadowMismatch(afterRepair)})`);
810
+ }
811
+ if (afterRepair.embeddingCount > 0) {
812
+ const verify = await db.execute(`
813
+ SELECT count(*) AS count
814
+ FROM vector_top_k(
815
+ 'idx_entries_embedding',
816
+ (SELECT embedding FROM entries WHERE ${ACTIVE_EMBEDDED_WHERE_SQL} LIMIT 1),
817
+ 1
818
+ )
819
+ `);
820
+ const verifyCount = toFiniteCount(verify.rows[0]?.count);
821
+ if (verifyCount !== 1) {
822
+ throw new Error(`Vector index rebuild verification failed (expected 1, got ${verifyCount})`);
823
+ }
824
+ }
825
+ await db.execute("COMMIT");
826
+ const durationMs = Date.now() - start;
827
+ onLog(
828
+ `[vector-index] rebuilt for ${afterRepair.embeddingCount} entries (${afterRepair.shadowCount} shadow rows, ${repairedCount} repaired, ${durationMs}ms)`
829
+ );
830
+ return {
831
+ embeddingCount: afterRepair.embeddingCount,
832
+ shadowCount: afterRepair.shadowCount,
833
+ repairedCount,
834
+ durationMs
835
+ };
836
+ } catch (error) {
837
+ await rollbackQuietly(db);
838
+ throw error;
839
+ }
840
+ }
841
+
653
842
  // src/db/schema/fts.ts
654
843
  async function rebuildEntriesFts(db) {
655
844
  await db.execute("INSERT INTO entries_fts(entries_fts) VALUES('delete-all')");
@@ -696,31 +885,16 @@ async function rebuildFtsAndTriggers(db) {
696
885
  throw error;
697
886
  }
698
887
  }
699
- async function rebuildVectorIndex(db) {
888
+ async function rebuildVectorIndex2(db) {
700
889
  try {
701
890
  await db.execute("REINDEX idx_entries_embedding");
702
- return;
703
- } catch {
704
- }
705
- await db.execute("BEGIN IMMEDIATE");
706
- try {
707
- await db.execute("DROP INDEX IF EXISTS idx_entries_embedding");
708
- await db.execute(CREATE_IDX_ENTRIES_EMBEDDING_SQL);
709
- await db.execute(`
710
- UPDATE entries
711
- SET embedding = embedding
712
- WHERE embedding IS NOT NULL
713
- AND retired = 0
714
- AND superseded_by IS NULL
715
- `);
716
- await db.execute("COMMIT");
717
- } catch (fallbackError) {
718
- try {
719
- await db.execute("ROLLBACK");
720
- } catch {
891
+ const stats = await getVectorIndexShadowStats(db);
892
+ if (stats.shadowCount === stats.embeddingCount && stats.missingRowids.length === 0) {
893
+ return;
721
894
  }
722
- throw fallbackError;
895
+ } catch {
723
896
  }
897
+ await rebuildVectorIndex(db);
724
898
  }
725
899
  async function setBulkIngestMeta(db, phase) {
726
900
  await db.execute({
@@ -1278,15 +1452,10 @@ async function repairVectorIndexIfNeeded(client) {
1278
1452
  if (probeCount > 0) {
1279
1453
  return;
1280
1454
  }
1281
- await client.execute("DROP INDEX IF EXISTS idx_entries_embedding");
1282
- await client.execute(CREATE_IDX_ENTRIES_EMBEDDING_SQL);
1283
- await client.execute(`
1284
- UPDATE entries
1285
- SET embedding = embedding
1286
- WHERE embedding IS NOT NULL
1287
- AND retired = 0
1288
- AND superseded_by IS NULL
1289
- `);
1455
+ } catch {
1456
+ }
1457
+ try {
1458
+ await rebuildVectorIndex(client);
1290
1459
  } catch {
1291
1460
  }
1292
1461
  }
@@ -1487,7 +1656,7 @@ async function checkAndRecoverBulkIngest(client) {
1487
1656
  }
1488
1657
  }
1489
1658
  if (indexCount < 1) {
1490
- await rebuildVectorIndex(client);
1659
+ await rebuildVectorIndex2(client);
1491
1660
  }
1492
1661
  try {
1493
1662
  const integrityResult = await client.execute("PRAGMA integrity_check");
@@ -1639,10 +1808,11 @@ function closeDb(client) {
1639
1808
  export {
1640
1809
  APP_VERSION,
1641
1810
  createLogger,
1642
- CREATE_IDX_ENTRIES_EMBEDDING_SQL,
1811
+ getVectorIndexShadowStats,
1812
+ rebuildVectorIndex,
1643
1813
  dropFtsTriggersAndIndex,
1644
1814
  rebuildFtsAndTriggers,
1645
- rebuildVectorIndex,
1815
+ rebuildVectorIndex2,
1646
1816
  setBulkIngestMeta,
1647
1817
  clearBulkIngestMeta,
1648
1818
  resetDb,
@@ -11,7 +11,7 @@ import {
11
11
  resolveHigherExpiry,
12
12
  resolveHighestExpiry,
13
13
  runSimpleStream
14
- } from "./chunk-PLIY7PBU.js";
14
+ } from "./chunk-D3DYUJKW.js";
15
15
  import {
16
16
  deleteCoRecallEdgesForEntryIds
17
17
  } from "./chunk-BXN5MMRG.js";
@@ -20,9 +20,6 @@ import {
20
20
  insertEntry,
21
21
  recordEntrySupport
22
22
  } from "./chunk-UNB2GHB2.js";
23
- import {
24
- rebuildVectorIndex
25
- } from "./chunk-JQJNOQH5.js";
26
23
  import {
27
24
  KNOWLEDGE_TYPES,
28
25
  resolveConflictLog
@@ -30,7 +27,7 @@ import {
30
27
  import {
31
28
  isShutdownRequested,
32
29
  resolveDefaultAppRuntimeDeps
33
- } from "./chunk-BD3SRNYZ.js";
30
+ } from "./chunk-YFOFO2FC.js";
34
31
  import {
35
32
  isRecord
36
33
  } from "./chunk-IVDSYJNR.js";
@@ -42,8 +39,9 @@ import {
42
39
  } from "./chunk-QDW77NBA.js";
43
40
  import {
44
41
  createLogger,
42
+ rebuildVectorIndex,
45
43
  walCheckpoint
46
- } from "./chunk-4GIJ227W.js";
44
+ } from "./chunk-HMBONTF3.js";
47
45
  import {
48
46
  cosineSimilarity,
49
47
  mapBufferToVector
@@ -80,6 +78,7 @@ var FILTERABLE_TASKS = [
80
78
  ];
81
79
  var ALL_TASKS = [
82
80
  ...FILTERABLE_TASKS,
81
+ "vector-integrity",
83
82
  "snapshot"
84
83
  ];
85
84
  var CONFLICTS_LIMIT_DEFAULT = 25;
@@ -1377,6 +1376,9 @@ function buildMergeContext(cluster) {
1377
1376
  "Merge the provided related entries into one or more canonical entries.",
1378
1377
  "Only include information explicitly stated in the source entries. Do not infer or add details not present.",
1379
1378
  "Do not turn distinct structured identities, role distinctions, or changed durable values into one blended blob summary.",
1379
+ "If entries describe different people, owners, locations, or other entities - even when they share a common topic - do not merge them into one entry. Split them into separate outputs.",
1380
+ "For example, 'Mom has dogs X and Y in Texas' and 'Dad has dog Z in Colorado' must remain separate entries, not become 'Mom has dogs X, Y, and Z.'",
1381
+ "If a source entry combines information about multiple distinct entities, split it into per-entity outputs rather than using the summary as justification to merge everything together.",
1380
1382
  "current_state, prior_state, and state_transition anchors must remain distinct. Split them into separate outputs when needed.",
1381
1383
  "If the source entries share structured claim metadata (subject_key, claim_predicate, claim_object), preserve those fields exactly in the merged output.",
1382
1384
  "Do not invent new structured claim metadata. Only include structured claim fields when they are shared across the source entries being merged.",