akm-cli 0.9.0-beta.6 → 0.9.0-beta.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,113 @@
1
+ // This Source Code Form is subject to the terms of the Mozilla Public
2
+ // License, v. 2.0. If a copy of the MPL was not distributed with this
3
+ // file, You can obtain one at https://mozilla.org/MPL/2.0/.
4
+ /** One day in milliseconds. */
5
+ const DAY_MS = 86_400_000;
6
+ /**
7
+ * Importance multipliers by asset type. Higher = more worth maintaining. These
8
+ * are the design defaults; callers may override any subset via config.
9
+ */
10
+ export const DEFAULT_IMPORTANCE_WEIGHTS = Object.freeze({
11
+ skill: 1.5,
12
+ agent: 1.5,
13
+ command: 1.3,
14
+ workflow: 1.3,
15
+ lesson: 1.2,
16
+ knowledge: 1.0,
17
+ script: 0.9,
18
+ memory: 0.7,
19
+ });
20
+ /** Default staleness gate: an asset is due when last reflected > this many days ago (or never). */
21
+ export const DEFAULT_DUE_DAYS = 30;
22
+ /** Default bound on how many assets the selector surfaces per run. */
23
+ export const DEFAULT_MAX_PER_RUN = 25;
24
+ /**
25
+ * Half-life (days) for the recency-of-use decay term. An asset used today
26
+ * contributes a full recency multiplier; one unused for one half-life
27
+ * contributes half. Mirrors the validated prototype (21 days).
28
+ */
29
+ const RECENCY_HALFLIFE_DAYS = 21;
30
+ /** Lower bound on size used in the cost denominator so tiny files don't divide by ~0. */
31
+ const SIZE_FLOOR_BYTES = 200;
32
+ /** Parse the bare asset type out of a `type:name` ref. Returns "" when unparseable. */
33
+ function refType(ref) {
34
+ const i = ref.indexOf(":");
35
+ return i > 0 ? ref.slice(0, i) : "";
36
+ }
37
+ /**
38
+ * Score and select due assets for proactive maintenance.
39
+ *
40
+ * Priority formula (mirrors the validated prototype):
41
+ *
42
+ * priority = (importance × log(1 + retrievalFreq) × (0.1 + 0.5^(useAgeDays/21)))
43
+ * / log10(max(size, 200))
44
+ *
45
+ * DUE gate: an asset is eligible only if it was never reflected OR last
46
+ * reflected/distilled more than `dueDays` ago. The same gate doubles as the
47
+ * ROTATION cooldown — a freshly-reflected asset is excluded until it ages back
48
+ * past `dueDays`, so successive runs rotate through the due pool rather than
49
+ * re-selecting the same heads. Non-due assets never enter the selection.
50
+ */
51
+ export function selectProactiveMaintenanceRefs(params) {
52
+ const now = params.now ?? Date.now();
53
+ const dueDays = params.dueDays ?? DEFAULT_DUE_DAYS;
54
+ const maxPerRun = params.maxPerRun ?? DEFAULT_MAX_PER_RUN;
55
+ const weights = { ...DEFAULT_IMPORTANCE_WEIGHTS, ...(params.importanceWeights ?? {}) };
56
+ const scored = [];
57
+ for (const candidate of params.candidates) {
58
+ const ref = candidate.ref;
59
+ const type = refType(ref);
60
+ // Staleness from the most recent of reflect/distill — either one touching
61
+ // the asset resets its maintenance clock.
62
+ const reflectIso = params.lastReflectTs.get(ref);
63
+ const distillIso = params.lastDistillTs.get(ref);
64
+ let lastTouchMs = 0;
65
+ if (reflectIso)
66
+ lastTouchMs = Math.max(lastTouchMs, Date.parse(reflectIso) || 0);
67
+ if (distillIso)
68
+ lastTouchMs = Math.max(lastTouchMs, Date.parse(distillIso) || 0);
69
+ const neverReflected = lastTouchMs === 0;
70
+ const staleDays = neverReflected ? Number.POSITIVE_INFINITY : (now - lastTouchMs) / DAY_MS;
71
+ // DUE / rotation gate.
72
+ const due = neverReflected || staleDays > dueDays;
73
+ // Retrieval frequency + recency decay.
74
+ const retrievalFreq = params.retrievalCounts.get(ref) ?? 0;
75
+ const lastUse = params.lastUseMs?.get(ref) ?? 0;
76
+ const useAgeDays = lastUse > 0 ? (now - lastUse) / DAY_MS : 9999;
77
+ const recencyDecay = 0.1 + 0.5 ** (useAgeDays / RECENCY_HALFLIFE_DAYS);
78
+ // Size proxy (cost): larger assets are slightly deprioritized, but only by
79
+ // log10 so a big-but-hot asset is never starved.
80
+ let sizeBytes = params.sizeBytesOf?.(candidate) ?? 0;
81
+ if (!sizeBytes || sizeBytes < 0)
82
+ sizeBytes = SIZE_FLOOR_BYTES;
83
+ const sizeProxy = Math.max(SIZE_FLOOR_BYTES, sizeBytes);
84
+ const importance = weights[type] ?? 1.0;
85
+ const priority = (importance * Math.log(1 + retrievalFreq) * recencyDecay) / Math.log10(sizeProxy);
86
+ scored.push({
87
+ ref: candidate,
88
+ type,
89
+ staleDays,
90
+ neverReflected,
91
+ retrievalFreq,
92
+ recencyDecay,
93
+ sizeBytes,
94
+ importance,
95
+ priority,
96
+ due,
97
+ });
98
+ }
99
+ const dueScored = scored.filter((s) => s.due);
100
+ const dueTotal = dueScored.length;
101
+ const neverReflected = dueScored.filter((s) => s.neverReflected).length;
102
+ // Rank due assets by composite priority (desc). Ties broken by staleness
103
+ // (older first) then ref string for deterministic ordering.
104
+ const ranked = dueScored.slice().sort((a, b) => {
105
+ if (b.priority !== a.priority)
106
+ return b.priority - a.priority;
107
+ if (b.staleDays !== a.staleDays)
108
+ return b.staleDays - a.staleDays;
109
+ return a.ref.ref < b.ref.ref ? -1 : a.ref.ref > b.ref.ref ? 1 : 0;
110
+ });
111
+ const selected = ranked.slice(0, Math.max(0, maxPerRun)).map((s) => s.ref);
112
+ return { selected, dueTotal, neverReflected, scored };
113
+ }
@@ -585,6 +585,9 @@ export async function akmReflect(options = {}) {
585
585
  metadata: {
586
586
  ...(options.task ? { task: options.task } : {}),
587
587
  ...(options.profile ? { profile: options.profile } : {}),
588
+ // Attribution tagging: stamp the eligibility lane so reflect_invoked can be
589
+ // sliced by lane downstream. See EligibilitySource.
590
+ ...(options.eligibilitySource ? { eligibilitySource: options.eligibilitySource } : {}),
588
591
  },
589
592
  });
590
593
  // Fix #3 (observability 0.8.0): every failure path below MUST emit
@@ -1228,6 +1231,9 @@ export async function akmReflect(options = {}) {
1228
1231
  // `parseAgentProposalPayload` already clamps to [0, 1] and drops non-
1229
1232
  // finite values; `createProposal` runs its own sanitizer as a safety net.
1230
1233
  ...(typeof payload.confidence === "number" ? { confidence: payload.confidence } : {}),
1234
+ // Attribution tagging: persist the eligibility lane on the proposal so it
1235
+ // survives to accept/reject/revert time even across runs. See EligibilitySource.
1236
+ ...(options.eligibilitySource ? { eligibilitySource: options.eligibilitySource } : {}),
1231
1237
  };
1232
1238
  const proposalResult = createProposal(stash, createInput, options.ctx);
1233
1239
  if (isProposalSkipped(proposalResult)) {
@@ -75,6 +75,11 @@ export async function akmProposalAccept(options) {
75
75
  source: result.proposal.source,
76
76
  ...(result.proposal.sourceRun !== undefined ? { sourceRun: result.proposal.sourceRun } : {}),
77
77
  assetPath: result.assetPath,
78
+ // Attribution tagging: carry the eligibility lane from the proposal record
79
+ // onto the promoted event so accept outcomes can be sliced by lane.
80
+ ...(result.proposal.eligibilitySource !== undefined
81
+ ? { eligibilitySource: result.proposal.eligibilitySource }
82
+ : {}),
78
83
  },
79
84
  });
80
85
  return {
@@ -50,7 +50,7 @@ import { makeAssetRef, parseAssetRef } from "../../../core/asset/asset-ref.js";
50
50
  import { resolveAssetPathFromName, TYPE_DIRS } from "../../../core/asset/asset-spec.js";
51
51
  import { NotFoundError, UsageError } from "../../../core/errors.js";
52
52
  import { appendEvent } from "../../../core/events.js";
53
- import { getStateDbPath, getStateProposal, hasImportedFsProposals, insertProposalIfAbsent, listStateProposalIdsByPrefix, listStateProposals, openStateDatabase, recordFsProposalsImport, upsertProposal, } from "../../../core/state-db.js";
53
+ import { getStateDbPath, getStateProposal, hasImportedFsProposals, insertProposalIfAbsent, listStateProposalIdsByPrefix, listStateProposals, openStateDatabase, recordFsProposalsImport, upsertProposal, withImmediateTransaction, } from "../../../core/state-db.js";
54
54
  import { warn } from "../../../core/warn.js";
55
55
  import { commitWriteTargetBoundary, formatRefForMessage, resolveWriteTarget, writeAssetToSource, } from "../../../core/write-source.js";
56
56
  import { runProposalValidators } from "./proposal-validators.js";
@@ -334,37 +334,42 @@ export function createProposal(stashDir, input, ctx) {
334
334
  }
335
335
  const normalizedRef = makeAssetRef(parsedRef.type, parsedRef.name, parsedRef.origin);
336
336
  return withProposalsDb(stashDir, ctx, (db) => {
337
- if (!input.force) {
338
- const skip = checkDedupAndCooldown(db, stashDir, normalizedRef, input, ctx);
339
- if (skip)
340
- return skip;
341
- }
342
- const created = nowIso(ctx);
343
- // Phase 6A: validate confidence is a finite number in [0, 1]. Anything else
344
- // is dropped silently we never store NaN, Infinity, or out-of-range values.
345
- // Callers that mis-report confidence should not poison the auto-accept gate.
346
- const sanitizedConfidence = typeof input.confidence === "number" &&
347
- Number.isFinite(input.confidence) &&
348
- input.confidence >= 0 &&
349
- input.confidence <= 1
350
- ? input.confidence
351
- : undefined;
352
- const proposal = {
353
- id: newId(ctx),
354
- ref: normalizedRef,
355
- status: "pending",
356
- source: input.source,
357
- ...(input.sourceRun !== undefined ? { sourceRun: input.sourceRun } : {}),
358
- createdAt: created,
359
- updatedAt: created,
360
- payload: {
361
- content: input.payload.content,
362
- ...(input.payload.frontmatter !== undefined ? { frontmatter: input.payload.frontmatter } : {}),
363
- },
364
- ...(sanitizedConfidence !== undefined ? { confidence: sanitizedConfidence } : {}),
365
- };
366
- upsertProposal(db, proposal, stashDir);
367
- return proposal;
337
+ return withImmediateTransaction(db, () => {
338
+ if (!input.force) {
339
+ const skip = checkDedupAndCooldown(db, stashDir, normalizedRef, input, ctx);
340
+ if (skip)
341
+ return skip;
342
+ }
343
+ const created = nowIso(ctx);
344
+ // Phase 6A: validate confidence is a finite number in [0, 1]. Anything else
345
+ // is dropped silently we never store NaN, Infinity, or out-of-range values.
346
+ // Callers that mis-report confidence should not poison the auto-accept gate.
347
+ const sanitizedConfidence = typeof input.confidence === "number" &&
348
+ Number.isFinite(input.confidence) &&
349
+ input.confidence >= 0 &&
350
+ input.confidence <= 1
351
+ ? input.confidence
352
+ : undefined;
353
+ const proposal = {
354
+ id: newId(ctx),
355
+ ref: normalizedRef,
356
+ status: "pending",
357
+ source: input.source,
358
+ ...(input.sourceRun !== undefined ? { sourceRun: input.sourceRun } : {}),
359
+ createdAt: created,
360
+ updatedAt: created,
361
+ payload: {
362
+ content: input.payload.content,
363
+ ...(input.payload.frontmatter !== undefined ? { frontmatter: input.payload.frontmatter } : {}),
364
+ },
365
+ ...(sanitizedConfidence !== undefined ? { confidence: sanitizedConfidence } : {}),
366
+ // Attribution tagging: persist the eligibility lane so it survives to
367
+ // accept/reject/revert time. See EligibilitySource.
368
+ ...(input.eligibilitySource !== undefined ? { eligibilitySource: input.eligibilitySource } : {}),
369
+ };
370
+ upsertProposal(db, proposal, stashDir);
371
+ return proposal;
372
+ });
368
373
  });
369
374
  }
370
375
  /**
@@ -517,19 +522,25 @@ export function resolveProposalId(stashDir, idOrRef, ctx) {
517
522
  */
518
523
  export function archiveProposal(stashDir, id, status, reason, ctx) {
519
524
  return withProposalsDb(stashDir, ctx, (db) => {
520
- const existing = requireProposal(db, stashDir, id);
521
- const updated = {
522
- ...existing,
523
- status,
524
- updatedAt: nowIso(ctx),
525
- review: {
526
- outcome: status,
527
- ...(reason !== undefined ? { reason } : {}),
528
- decidedAt: nowIso(ctx),
529
- },
530
- };
531
- upsertProposal(db, updated, stashDir);
532
- return updated;
525
+ return withImmediateTransaction(db, () => {
526
+ const existing = requireProposal(db, stashDir, id);
527
+ if (existing.status !== "pending") {
528
+ throw new UsageError(`Proposal ${id} is not pending (current status: ${existing.status}). Only pending proposals can be ${status}.`, "INVALID_FLAG_VALUE");
529
+ }
530
+ const decidedAt = nowIso(ctx);
531
+ const updated = {
532
+ ...existing,
533
+ status,
534
+ updatedAt: decidedAt,
535
+ review: {
536
+ outcome: status,
537
+ ...(reason !== undefined ? { reason } : {}),
538
+ decidedAt,
539
+ },
540
+ };
541
+ upsertProposal(db, updated, stashDir);
542
+ return updated;
543
+ });
533
544
  });
534
545
  }
535
546
  /**
@@ -548,15 +559,17 @@ export function archiveProposal(stashDir, id, status, reason, ctx) {
548
559
  */
549
560
  export function recordGateDecision(stashDir, id, decision, ctx) {
550
561
  return withProposalsDb(stashDir, ctx, (db) => {
551
- const existing = getStateProposal(db, id, stashDir);
552
- if (!existing)
553
- return undefined;
554
- const updated = {
555
- ...existing,
556
- gateDecision: { ...decision, decidedAt: decision.decidedAt ?? nowIso(ctx) },
557
- };
558
- upsertProposal(db, updated, stashDir);
559
- return updated;
562
+ return withImmediateTransaction(db, () => {
563
+ const existing = getStateProposal(db, id, stashDir);
564
+ if (!existing || existing.status !== "pending")
565
+ return undefined;
566
+ const updated = {
567
+ ...existing,
568
+ gateDecision: { ...decision, decidedAt: decision.decidedAt ?? nowIso(ctx) },
569
+ };
570
+ upsertProposal(db, updated, stashDir);
571
+ return updated;
572
+ });
560
573
  });
561
574
  }
562
575
  /**
@@ -54,6 +54,9 @@ function logCurateEvent(query, result) {
54
54
  try {
55
55
  const db = openExistingDatabase();
56
56
  try {
57
+ // Summary row (entry_ref = NULL): preserves the query → itemRefs audit
58
+ // trail. Retrieval counting ignores NULL-ref rows, so this row is purely
59
+ // informational.
57
60
  insertUsageEvent(db, {
58
61
  event_type: "curate",
59
62
  query,
@@ -63,6 +66,20 @@ function logCurateEvent(query, result) {
63
66
  }),
64
67
  source: "user",
65
68
  });
69
+ // Per-item rows with entry_ref populated so curation registers as a real
70
+ // retrieval signal in getRetrievalCounts (which counts 'curate' events).
71
+ // Only stash items expose a canonical asset ref; registry hits
72
+ // (`registry:<id>`) have no asset ref and are skipped here.
73
+ for (const item of result.items) {
74
+ if (!("ref" in item) || typeof item.ref !== "string")
75
+ continue;
76
+ insertUsageEvent(db, {
77
+ event_type: "curate",
78
+ query,
79
+ entry_ref: item.ref,
80
+ source: "user",
81
+ });
82
+ }
66
83
  }
67
84
  finally {
68
85
  closeDatabase(db);
@@ -71,6 +71,11 @@ export const indexCommand = defineCommand({
71
71
  description: "When combined with --clean, report stale entries without deleting them.",
72
72
  default: false,
73
73
  },
74
+ background: {
75
+ type: "boolean",
76
+ description: "Run as a background process (suppresses interactive output, manages PID file).",
77
+ default: false,
78
+ },
74
79
  },
75
80
  async run({ args }) {
76
81
  await runWithJsonErrors(async () => {
@@ -80,6 +85,7 @@ export const indexCommand = defineCommand({
80
85
  if (getHyphenatedBoolean(args, "re-enrich") || parseFlagValue(process.argv, "--re-enrich") !== undefined) {
81
86
  throw new UsageError("`akm index --re-enrich` has been removed. Re-enrichment of index-time LLM passes is not exposed in this slice.");
82
87
  }
88
+ const isBackground = args.background === true;
83
89
  const outputMode = getOutputMode();
84
90
  const controller = new AbortController();
85
91
  const abort = () => controller.abort(new Error("index interrupted"));
@@ -88,7 +94,7 @@ export const indexCommand = defineCommand({
88
94
  const indexLogFile = path.join(getCacheDir(), "logs", "index", `${new Date().toISOString().replace(/[:.]/g, "-")}.log`);
89
95
  setLogFile(indexLogFile);
90
96
  const verbose = isVerbose();
91
- const spin = !verbose && outputMode.format === "text" ? p.spinner() : null;
97
+ const spin = !verbose && !isBackground && outputMode.format === "text" ? p.spinner() : null;
92
98
  if (spin) {
93
99
  spin.start(`Building search index${args.full ? " (full rebuild)" : ""}...`);
94
100
  }
@@ -114,7 +120,9 @@ export const indexCommand = defineCommand({
114
120
  if (spin) {
115
121
  spin.stop(`Indexed ${result.totalEntries} assets.`);
116
122
  }
117
- output("index", result);
123
+ if (!isBackground) {
124
+ output("index", result);
125
+ }
118
126
  }
119
127
  catch (error) {
120
128
  if (spin) {
@@ -156,6 +156,15 @@ export const ImproveProcessConfigSchema = z
156
156
  neighborsPerChanged: z.number().int().min(1).optional(),
157
157
  // Distill process: skip distill entirely when reflect produced zero planned refs.
158
158
  requirePlannedRefs: z.boolean().optional(),
159
+ // proactiveMaintenance process (Layer 2): staleness gate + rotation cooldown
160
+ // in days (default 30). Only meaningful on `proactiveMaintenance`.
161
+ dueDays: z.number().int().min(0).optional(),
162
+ // proactiveMaintenance process: top-N bound per run (default 25). Alias for
163
+ // `limit`; `maxPerRun` wins when both are set.
164
+ maxPerRun: positiveInt.optional(),
165
+ // proactiveMaintenance process: optional per-type importance overrides,
166
+ // merged over the built-in defaults. Only meaningful on `proactiveMaintenance`.
167
+ importanceWeights: z.record(z.string().min(1), z.number()).optional(),
159
168
  // MemoryInference process: minimum pending memory count to run the pass.
160
169
  minPendingCount: z.number().int().min(0).optional(),
161
170
  // Extract process: minimum number of new (unseen, in-window) candidate
@@ -195,6 +204,7 @@ const ImproveProfileProcessesSchema = z
195
204
  graphExtraction: ImproveProcessConfigSchema.optional(),
196
205
  validation: ImproveProcessConfigSchema.optional(),
197
206
  triage: ImproveProcessConfigSchema.optional(),
207
+ proactiveMaintenance: ImproveProcessConfigSchema.optional(),
198
208
  })
199
209
  .passthrough()
200
210
  .superRefine((val, ctx) => {
@@ -218,6 +228,7 @@ const ImproveProfileProcessesSchema = z
218
228
  "validation",
219
229
  "extract",
220
230
  "triage",
231
+ "proactiveMaintenance",
221
232
  ]);
222
233
  for (const k of Object.keys(raw)) {
223
234
  if (!allowed.has(k)) {
@@ -215,6 +215,9 @@ export function getDataDir(env = process.env, platform = process.platform) {
215
215
  export function getDbPath() {
216
216
  return path.join(getDataDir(), "index.db");
217
217
  }
218
+ export function getIndexWriterLockPath() {
219
+ return path.join(getDataDir(), "index.db.write.lock");
220
+ }
218
221
  export function getWorkflowDbPath() {
219
222
  return path.join(getDataDir(), "workflow.db");
220
223
  }
@@ -193,7 +193,7 @@ const MIGRATIONS = [
193
193
  -- metadata_json TEXT — JSON object for future proposal fields.
194
194
  -- Current fields stored here: sourceRun,
195
195
  -- review, confidence, gateDecision (#577),
196
- -- backupContent.
196
+ -- backupContent, eligibilitySource.
197
197
  --
198
198
  -- ADD COLUMN extension points (future migrations):
199
199
  -- ALTER TABLE proposals ADD COLUMN source_run TEXT DEFAULT NULL;
@@ -488,6 +488,20 @@ const MIGRATIONS = [
488
488
  );
489
489
  `,
490
490
  },
491
+ // ── Migration 006 — pending proposal lookup index ──────────────────────────
492
+ //
493
+ // Supports the transaction-scoped dedup / queue-mutation hardening added in
494
+ // 0.9.x. The queue now acquires an IMMEDIATE write transaction before it
495
+ // reads pending proposals, so the hot path is a stash-scoped `status='pending'
496
+ // AND ref=?` probe followed by an update/insert. This composite index keeps
497
+ // that lookup index-covered under contention.
498
+ {
499
+ id: "006-proposals-pending-ref-source",
500
+ up: `
501
+ CREATE INDEX IF NOT EXISTS idx_proposals_stash_status_ref_source
502
+ ON proposals(stash_dir, status, ref, source);
503
+ `,
504
+ },
491
505
  ];
492
506
  /**
493
507
  * Apply every pending migration in a single transaction per migration.
@@ -562,6 +576,9 @@ export function proposalRowToProposal(row) {
562
576
  ...(typeof meta.confidence === "number" ? { confidence: meta.confidence } : {}),
563
577
  ...(meta.gateDecision !== undefined ? { gateDecision: meta.gateDecision } : {}),
564
578
  ...(typeof meta.backupContent === "string" ? { backupContent: meta.backupContent } : {}),
579
+ ...(typeof meta.eligibilitySource === "string"
580
+ ? { eligibilitySource: meta.eligibilitySource }
581
+ : {}),
565
582
  };
566
583
  }
567
584
  /**
@@ -581,6 +598,8 @@ export function proposalToRowValues(proposal, stashDir) {
581
598
  metaObj.gateDecision = proposal.gateDecision;
582
599
  if (proposal.backupContent !== undefined)
583
600
  metaObj.backupContent = proposal.backupContent;
601
+ if (proposal.eligibilitySource !== undefined)
602
+ metaObj.eligibilitySource = proposal.eligibilitySource;
584
603
  return {
585
604
  id: proposal.id,
586
605
  stash_dir: stashDir,
@@ -785,6 +804,32 @@ export function insertProposalIfAbsent(db, proposal, stashDir) {
785
804
  const changes = result.changes ?? 0;
786
805
  return Number(changes) > 0;
787
806
  }
807
+ /**
808
+ * Run `fn` inside a `BEGIN IMMEDIATE` transaction.
809
+ *
810
+ * `db.transaction()` is DEFERRED by default on both Bun and better-sqlite3,
811
+ * which means two writers can both perform stale preflight reads and only race
812
+ * when they finally attempt the write. Proposal creation and queue mutation
813
+ * need the write lock BEFORE those reads so concurrent processes serialize on
814
+ * the live queue state rather than clobbering each other.
815
+ */
816
+ export function withImmediateTransaction(db, fn) {
817
+ db.exec("BEGIN IMMEDIATE");
818
+ try {
819
+ const result = fn();
820
+ db.exec("COMMIT");
821
+ return result;
822
+ }
823
+ catch (err) {
824
+ try {
825
+ db.exec("ROLLBACK");
826
+ }
827
+ catch {
828
+ // Ignore rollback failures so the original error is preserved.
829
+ }
830
+ throw err;
831
+ }
832
+ }
788
833
  // ── task_history table helpers ───────────────────────────────────────────────
789
834
  /**
790
835
  * Upsert a task history row.
@@ -1262,6 +1262,19 @@ export function getEntryIdByFilePath(db, filePath) {
1262
1262
  const row = db.prepare("SELECT id FROM entries WHERE file_path = ? LIMIT 1").get(filePath);
1263
1263
  return row?.id;
1264
1264
  }
1265
+ /**
1266
+ * Set of every non-empty `entries.file_path` currently indexed (across all
1267
+ * stashes/sources). Used by staleness detection to spot files that exist on
1268
+ * disk but were never indexed — a clock-independent signal for newly-added
1269
+ * assets that an mtime-vs-builtAt comparison can miss when the two clocks
1270
+ * (filesystem vs wall-clock) are skewed within the same millisecond.
1271
+ */
1272
+ export function getIndexedFilePaths(db) {
1273
+ const rows = db
1274
+ .prepare("SELECT DISTINCT file_path FROM entries WHERE file_path IS NOT NULL AND file_path <> ''")
1275
+ .all();
1276
+ return new Set(rows.map((r) => r.file_path));
1277
+ }
1265
1278
  /**
1266
1279
  * Resolve a single `entries.file_path` by primary key, or `undefined` if no
1267
1280
  * row matches.
@@ -1477,25 +1490,98 @@ export function computeBodyHash(body) {
1477
1490
  return sha256Hex(body);
1478
1491
  }
1479
1492
  /**
1480
- * Count search and show events for the given entry refs.
1481
- * Returns a Map<ref, count> with only refs that have at least one event.
1482
- * Used by the improve loop to find high-retrieval assets without feedback.
1493
+ * Reduce a ref to its bare `type:name` form, dropping any `origin//` prefix.
1494
+ *
1495
+ * usage_events store entry_ref inconsistently: search/show writers persist
1496
+ * whatever ref the result carried, which is sometimes stash-prefixed
1497
+ * (`origin//type:name`) and sometimes bare (`type:name`). Retrieval counting
1498
+ * keys on the bare form so both spellings of the same asset collapse together.
1499
+ *
1500
+ * Returns the bare form, or the original string when it cannot be parsed (best
1501
+ * effort — never throws so a malformed stored ref can't break counting).
1502
+ */
1503
+ function bareRef(ref) {
1504
+ try {
1505
+ const parsed = parseAssetRef(ref);
1506
+ return `${parsed.type}:${parsed.name}`;
1507
+ }
1508
+ catch {
1509
+ return ref;
1510
+ }
1511
+ }
1512
+ /**
1513
+ * Count retrieval events for the given entry refs.
1514
+ *
1515
+ * Counts `search`, `show`, and `curate` usage events. Returns a
1516
+ * Map<inputRef, count> keyed by the *input* ref strings (only those with at
1517
+ * least one matching event appear). Used by the improve loop to find
1518
+ * high-retrieval assets without feedback.
1519
+ *
1520
+ * Matching is normalization-aware: each stored `entry_ref` is reduced to its
1521
+ * bare `type:name` form before comparison, so a stash-prefixed stored ref
1522
+ * (`origin//type:name`) still matches a bare input ref (`type:name`) and vice
1523
+ * versa. Previously the raw `entry_ref IN (...)` comparison silently dropped
1524
+ * roughly half the signal whenever the two spellings disagreed.
1525
+ *
1526
+ * `curate` events are included: their per-item rows are written with
1527
+ * entry_ref populated (see logCurateEvent), so curation is a real retrieval
1528
+ * signal here. Legacy summary-only curate rows with a NULL entry_ref simply
1529
+ * contribute nothing.
1483
1530
  */
1484
1531
  export function getRetrievalCounts(db, refs) {
1485
1532
  if (refs.length === 0)
1486
1533
  return new Map();
1487
- const result = new Map();
1534
+ // Map each distinct bare form back to the input ref(s) that produced it so we
1535
+ // can re-key DB results (grouped by bare form) onto the caller's ref strings.
1536
+ const bareToInputs = new Map();
1537
+ for (const ref of refs) {
1538
+ const bare = bareRef(ref);
1539
+ const existing = bareToInputs.get(bare);
1540
+ if (existing)
1541
+ existing.push(ref);
1542
+ else
1543
+ bareToInputs.set(bare, [ref]);
1544
+ }
1545
+ const bareForms = [...bareToInputs.keys()];
1546
+ // Accumulate counts per bare form across chunks before re-keying.
1547
+ const countsByBare = new Map();
1488
1548
  // Chunk to stay within SQLITE_MAX_VARIABLE_NUMBER (same pattern as getUtilityScoresByIds).
1489
- for (let i = 0; i < refs.length; i += SQLITE_CHUNK_SIZE) {
1490
- const chunk = refs.slice(i, i + SQLITE_CHUNK_SIZE);
1549
+ for (let i = 0; i < bareForms.length; i += SQLITE_CHUNK_SIZE) {
1550
+ const chunk = bareForms.slice(i, i + SQLITE_CHUNK_SIZE);
1491
1551
  const placeholders = chunk.map(() => "?").join(", ");
1552
+ // Normalize the stored entry_ref to its bare form inside SQL by stripping
1553
+ // everything up to and including the last `//` separator. SQLite has no
1554
+ // rfind, but stored origins never themselves contain `//`, so a stash ref
1555
+ // has exactly one `//` and `substr(... instr ...)` is exact; bare refs have
1556
+ // no `//` and pass through unchanged.
1492
1557
  const rows = db
1493
- .prepare(`SELECT entry_ref, COUNT(*) AS cnt FROM usage_events
1494
- WHERE event_type IN ('search','show') AND entry_ref IN (${placeholders})
1495
- GROUP BY entry_ref`)
1558
+ .prepare(`SELECT
1559
+ CASE
1560
+ WHEN instr(entry_ref, '//') > 0
1561
+ THEN substr(entry_ref, instr(entry_ref, '//') + 2)
1562
+ ELSE entry_ref
1563
+ END AS bare_ref,
1564
+ COUNT(*) AS cnt
1565
+ FROM usage_events
1566
+ WHERE event_type IN ('search','show','curate')
1567
+ AND entry_ref IS NOT NULL
1568
+ AND CASE
1569
+ WHEN instr(entry_ref, '//') > 0
1570
+ THEN substr(entry_ref, instr(entry_ref, '//') + 2)
1571
+ ELSE entry_ref
1572
+ END IN (${placeholders})
1573
+ GROUP BY bare_ref`)
1496
1574
  .all(...chunk);
1497
- for (const r of rows)
1498
- result.set(r.entry_ref, r.cnt);
1575
+ for (const r of rows) {
1576
+ countsByBare.set(r.bare_ref, (countsByBare.get(r.bare_ref) ?? 0) + r.cnt);
1577
+ }
1578
+ }
1579
+ // Re-key bare-form counts onto every input ref that maps to that bare form.
1580
+ const result = new Map();
1581
+ for (const [bare, count] of countsByBare) {
1582
+ for (const input of bareToInputs.get(bare) ?? []) {
1583
+ result.set(input, count);
1584
+ }
1499
1585
  }
1500
1586
  return result;
1501
1587
  }