@gethmy/mcp 2.8.4 → 2.8.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -68,14 +68,18 @@ export function resolveAgentIdentity(info: ClientInfo | null): {
68
68
  /**
69
69
  * Tools that trigger auto-start of a session.
70
70
  *
71
- * Restricted to tools that signal real work on a card. Board-management ops
72
- * (move, label add/remove) are excluded — they're routinely used for triage
73
- * and would create false-positive sessions whose side effect (the auto-added
74
- * `agent` label on the card) confuses both UI and humans.
71
+ * Restricted to tools that signal real work on a card. Triage/board-management
72
+ * ops are excluded — they're routinely used for sorting and card creation, not
73
+ * implementation, and would create false-positive sessions whose side effect
74
+ * (the auto-added `agent` label on the card) confuses both UI and humans.
75
+ *
76
+ * `harmony_update_card` is deliberately NOT a trigger: editing a card's
77
+ * title/description/priority is metadata editing (used during `/hmy` create and
78
+ * triage), not work. Including it spawned phantom sessions on freshly-created
79
+ * cards (card #295), the same reason move/label ops are excluded.
75
80
  */
76
81
  export const AUTO_START_TRIGGERS = new Set([
77
82
  "harmony_generate_prompt",
78
- "harmony_update_card",
79
83
  "harmony_create_subtask",
80
84
  "harmony_toggle_subtask",
81
85
  "harmony_update_subtask",
@@ -134,6 +138,15 @@ export async function trackActivity(
134
138
  const client = options?.client ?? clientGetter?.();
135
139
  if (!client) return;
136
140
 
141
+ // Resolve agent identity from the MCP `initialize` handshake. Never auto-start
142
+ // an anonymous session: if we can't say WHO is working, we don't fabricate a
143
+ // phantom "Unknown Agent" session (card #295). Identified clients only — this
144
+ // bail happens BEFORE ending other sessions so an unidentified call can't tear
145
+ // down a legitimate tracked session.
146
+ const info = clientInfoGetter?.() ?? null;
147
+ if (!info?.name) return;
148
+ const { agentIdentifier, agentName } = resolveAgentIdentity(info);
149
+
137
150
  // Collect auto-sessions on other cards to end (avoid mutating map during iteration)
138
151
  const toEnd: string[] = [];
139
152
  for (const [otherCardId, session] of activeSessions) {
@@ -145,10 +158,6 @@ export async function trackActivity(
145
158
  await autoEndSession(client, otherCardId, "completed");
146
159
  }
147
160
 
148
- // Resolve agent identity from MCP client info
149
- const info = clientInfoGetter?.() ?? null;
150
- const { agentIdentifier, agentName } = resolveAgentIdentity(info);
151
-
152
161
  // Start a new auto-session
153
162
  try {
154
163
  await client.startAgentSession(cardId, {
@@ -138,6 +138,159 @@ export async function findSimilarEntities(
138
138
  }
139
139
  }
140
140
 
141
+ // ============ WRITE-TIME SEMANTIC DEDUP (card #275) ============
142
+
143
+ /**
144
+ * RRF-score floor for treating a hybrid-search hit as a *supersede candidate*
145
+ * at write time. The hybrid_search RPC fuses FTS + vector ranks via Reciprocal
146
+ * Rank Fusion: score = 1/(k+fts_rank) + 1/(k+semantic_rank), k=50. A row that
147
+ * ranks #1 in BOTH lists tops out near 2/51 ≈ 0.039; #1 in a single list is
148
+ * ≈ 0.0196. RRF rank is NOT cosine similarity, so this threshold alone is a
149
+ * weak signal — it is paired with a lexical title-overlap guard below so we
150
+ * only ever surface genuinely near-duplicate titles. Deliberately
151
+ * conservative: dedup must never produce false "this already exists" noise.
152
+ *
153
+ * Tuning note: the cross-type causal linker (linkCrossTypeNeighbors) uses
154
+ * minRrfScore 0.04 for a comparable "strongly related" bar; we sit just under
155
+ * it because dedup probes the SAME type and wants the top fused hit.
156
+ */
157
+ export const SUPERSEDE_RRF_THRESHOLD = 0.029;
158
+
159
+ /**
160
+ * Minimum Jaccard overlap of significant title tokens required, in addition to
161
+ * the RRF floor, before a hit counts as a supersede candidate. Guards against
162
+ * semantic-only matches (e.g. two different patterns about "BoardContext")
163
+ * being flagged as duplicates. 0.5 = at least half the significant tokens are
164
+ * shared.
165
+ */
166
+ export const SUPERSEDE_TITLE_OVERLAP = 0.5;
167
+
168
+ const TITLE_STOPWORDS = new Set([
169
+ "a",
170
+ "an",
171
+ "the",
172
+ "and",
173
+ "or",
174
+ "of",
175
+ "to",
176
+ "in",
177
+ "on",
178
+ "for",
179
+ "with",
180
+ "is",
181
+ "are",
182
+ "be",
183
+ "by",
184
+ "at",
185
+ "as",
186
+ ]);
187
+
188
+ function significantTitleTokens(title: string): Set<string> {
189
+ return new Set(
190
+ title
191
+ .toLowerCase()
192
+ .replace(/[^a-z0-9\s]/g, " ")
193
+ .split(/\s+/)
194
+ .filter((t) => t.length > 2 && !TITLE_STOPWORDS.has(t)),
195
+ );
196
+ }
197
+
198
+ /** Jaccard similarity of two token sets. Returns 0 when either set is empty. */
199
+ function jaccard(a: Set<string>, b: Set<string>): number {
200
+ if (a.size === 0 || b.size === 0) return 0;
201
+ let intersection = 0;
202
+ for (const t of a) if (b.has(t)) intersection++;
203
+ return intersection / (a.size + b.size - intersection);
204
+ }
205
+
206
+ export interface SupersedeCandidate {
207
+ id: string;
208
+ title: string;
209
+ /** RRF score from the hybrid search (higher = more relevant). */
210
+ score: number;
211
+ }
212
+
213
+ /**
214
+ * Write-time dedup probe (card #275). BEFORE inserting a new memory, find
215
+ * existing, non-superseded entities of the SAME type + scope that look like
216
+ * near-duplicates of the candidate title+content.
217
+ *
218
+ * Reuses the existing hybrid-search path (searchMemoryEntities → the
219
+ * hybrid_search_knowledge_entities RPC) — no new embedding pipeline. A hit must
220
+ * clear BOTH the RRF floor AND a lexical title-overlap guard, so this is a
221
+ * conservative "these are probably the same memory" signal, never a silent
222
+ * merge.
223
+ *
224
+ * Non-fatal and non-blocking: any failure returns [] so the write still
225
+ * proceeds. The caller ALWAYS inserts; this only surfaces candidates for the
226
+ * caller (agent / assistant / human) to optionally supersede.
227
+ */
228
+ export async function findSupersedeCandidates(
229
+ client: HarmonyApiClient,
230
+ title: string,
231
+ content: string,
232
+ type: string,
233
+ workspaceId: string,
234
+ options?: {
235
+ projectId?: string;
236
+ scope?: string;
237
+ limit?: number;
238
+ rrfThreshold?: number;
239
+ titleOverlap?: number;
240
+ },
241
+ ): Promise<SupersedeCandidate[]> {
242
+ const rrfThreshold = options?.rrfThreshold ?? SUPERSEDE_RRF_THRESHOLD;
243
+ const titleOverlap = options?.titleOverlap ?? SUPERSEDE_TITLE_OVERLAP;
244
+ const candidateTokens = significantTitleTokens(title);
245
+
246
+ try {
247
+ const hits = await findSimilarEntities(
248
+ client,
249
+ title,
250
+ content,
251
+ workspaceId,
252
+ {
253
+ projectId: options?.projectId,
254
+ // Filter to the same type server-side — dedup only applies within a type.
255
+ type,
256
+ limit: options?.limit ?? 10,
257
+ minRrfScore: rrfThreshold,
258
+ },
259
+ );
260
+
261
+ return hits
262
+ .filter((e) => {
263
+ // Same scope only — a project memory shouldn't supersede a global one.
264
+ if (options?.scope && (e as { scope?: string }).scope !== undefined) {
265
+ if ((e as { scope?: string }).scope !== options.scope) return false;
266
+ }
267
+ // Skip already-superseded rows when the field is present. NOTE: the
268
+ // hybrid-search RPC does not return `superseded_at`, so this only fires
269
+ // on the FTS-fallback path; on the embedding path an already-retired row
270
+ // can still surface as a *candidate*. That is non-destructive — the
271
+ // `similar` list is advisory and the caller decides explicitly whether
272
+ // to supersede. A complete fix needs the RPC to return/filter the column
273
+ // (migration + deploy); tracked in docs/memory.md.
274
+ if ((e as { superseded_at?: string | null }).superseded_at) {
275
+ return false;
276
+ }
277
+ // Lexical guard: require real title-token overlap on top of RRF.
278
+ return (
279
+ jaccard(candidateTokens, significantTitleTokens(e.title)) >=
280
+ titleOverlap
281
+ );
282
+ })
283
+ .map((e) => ({
284
+ id: e.id,
285
+ title: e.title,
286
+ score: e.rrf_score ?? 0,
287
+ }));
288
+ } catch {
289
+ // Never block a write because the dedup probe failed.
290
+ return [];
291
+ }
292
+ }
293
+
141
294
  /**
142
295
  * Causal lookup table: maps an entity type to the target types it should
143
296
  * be linked to, along with the relation type and direction.
@@ -11,9 +11,18 @@
11
11
  * baseline so recency + importance still differentiate.
12
12
  * recency_decay — exp(-Δt_seconds / τ_type) clamped to [0, 1].
13
13
  * τ depends on memory type per plan §4.
14
- * importance_norm — importance / 10, clamped to [0, 1].
14
+ * importance_norm — effective_importance / 10, clamped to [0, 1], where
15
+ * effective_importance folds in two bounded, deterministic
16
+ * signals on top of the stored importance (card #279):
17
+ * + usage bump — proven-useful memories (recalled
18
+ * often) rank above never-recalled ones
19
+ * + feedback bump — 👍/👎 stored in metadata.feedback
20
+ * See `effectiveImportance` below. This is RANKING ONLY:
21
+ * nothing is stored, deleted, or mutated.
15
22
  *
16
- * defaults: α=0.55, β=0.25, γ=0.20 (sum to 1.0).
23
+ * defaults: α=0.55, β=0.25, γ=0.20 (sum to 1.0). Weights are NOT re-tuned by
24
+ * #279 — usage + feedback fold into the existing γ·importance term so the
25
+ * formula stays a stable 3-weight model.
17
26
  *
18
27
  * The function is pure. Hot-path-safe — no LLM calls, no DB reads.
19
28
  */
@@ -28,6 +37,37 @@ export const DEFAULT_WEIGHTS = {
28
37
  importance: 0.2,
29
38
  } as const;
30
39
 
40
+ // ---------------------------------------------------------------------------
41
+ // Usage + feedback bumps (card #279) — fold into effective importance.
42
+ // ---------------------------------------------------------------------------
43
+
44
+ /**
45
+ * Usage bump (card #279, task 2). A bounded, log-scaled lift to importance for
46
+ * memories that have actually been recalled. Proven-useful memories outrank
47
+ * never-recalled ones at equal relevance/recency; never-used ones get +0 and
48
+ * gently sink relative to their used peers.
49
+ *
50
+ * bump = USAGE_BUMP_SCALE · ln(1 + access_count), capped at USAGE_BUMP_MAX
51
+ *
52
+ * Log scaling keeps the lift gentle and diminishing: the jump from 0→1 recall
53
+ * matters most, runaway counts can't dominate. Capped so usage never swamps the
54
+ * stored importance signal. Pure + deterministic — no LLM, no storage change.
55
+ */
56
+ export const USAGE_BUMP_SCALE = 0.6;
57
+ export const USAGE_BUMP_MAX = 2;
58
+
59
+ /**
60
+ * Feedback bump (card #279, task 3). Net 👍/👎 stored non-destructively in
61
+ * `metadata.feedback = { up, down }` shifts importance up (positive net) or
62
+ * down (negative net), bounded and symmetric. Feedback affects RANKING ONLY —
63
+ * it never deletes or supersedes a memory.
64
+ *
65
+ * bump = FEEDBACK_BUMP_SCALE · sign(net) · ln(1 + |net|),
66
+ * clamped to ±FEEDBACK_BUMP_MAX
67
+ */
68
+ export const FEEDBACK_BUMP_SCALE = 0.8;
69
+ export const FEEDBACK_BUMP_MAX = 2;
70
+
31
71
  // Per-type recency time constant τ in seconds.
32
72
  // `Infinity` = never decays (preferences shouldn't fade with disuse).
33
73
  export const TYPE_TAU_SECONDS: Record<string, number> = {
@@ -71,17 +111,28 @@ export const TYPE_IMPORTANCE_DEFAULT: Record<string, number> = {
71
111
  // Types
72
112
  // ---------------------------------------------------------------------------
73
113
 
114
+ /** 👍/👎 counters stored non-destructively at `metadata.feedback`. */
115
+ export interface MemoryFeedback {
116
+ up?: number;
117
+ down?: number;
118
+ }
119
+
74
120
  export interface ParkInput {
75
121
  type: string;
76
122
  importance?: number | null;
77
123
  last_accessed_at?: string | null;
78
124
  created_at?: string | null;
125
+ /** Recall counter maintained by batch_touch_knowledge_entities (#273). */
126
+ access_count?: number | null;
127
+ /** Carries `metadata.feedback` (#279). Other metadata keys are ignored. */
128
+ metadata?: { feedback?: MemoryFeedback | null } | null;
79
129
  }
80
130
 
81
131
  export interface ParkScored<T extends ParkInput> {
82
132
  entity: T;
83
133
  relevance: number;
84
134
  recency: number;
135
+ /** Effective importance term, normalised to [0,1] (post usage + feedback). */
85
136
  importance: number;
86
137
  score: number;
87
138
  }
@@ -124,10 +175,59 @@ function recencyDecay(
124
175
  return clamp01(Math.exp(-dtSec / tau));
125
176
  }
126
177
 
127
- function importanceNorm(raw: number | null | undefined, type: string): number {
178
+ /** Resolve the stored (base) importance, clamped to [1,10]. */
179
+ function baseImportance(raw: number | null | undefined, type: string): number {
128
180
  let v = typeof raw === "number" ? raw : (TYPE_IMPORTANCE_DEFAULT[type] ?? 5);
129
181
  if (v < 1) v = 1;
130
182
  if (v > 10) v = 10;
183
+ return v;
184
+ }
185
+
186
+ /**
187
+ * Bounded, log-scaled usage lift (card #279, task 2). +0 for never-recalled.
188
+ * Pure; reads only `access_count`.
189
+ */
190
+ export function usageBump(accessCount: number | null | undefined): number {
191
+ const n =
192
+ typeof accessCount === "number" && accessCount > 0 ? accessCount : 0;
193
+ if (n === 0) return 0;
194
+ return Math.min(USAGE_BUMP_MAX, USAGE_BUMP_SCALE * Math.log(1 + n));
195
+ }
196
+
197
+ /**
198
+ * Bounded, symmetric feedback shift (card #279, task 3). Positive net 👍 lifts,
199
+ * negative net 👎 demotes. Pure; reads only `metadata.feedback`.
200
+ */
201
+ export function feedbackBump(
202
+ feedback: MemoryFeedback | null | undefined,
203
+ ): number {
204
+ const up = typeof feedback?.up === "number" ? feedback.up : 0;
205
+ const down = typeof feedback?.down === "number" ? feedback.down : 0;
206
+ const net = up - down;
207
+ if (net === 0) return 0;
208
+ const raw =
209
+ FEEDBACK_BUMP_SCALE * Math.sign(net) * Math.log(1 + Math.abs(net));
210
+ if (raw > FEEDBACK_BUMP_MAX) return FEEDBACK_BUMP_MAX;
211
+ if (raw < -FEEDBACK_BUMP_MAX) return -FEEDBACK_BUMP_MAX;
212
+ return raw;
213
+ }
214
+
215
+ /**
216
+ * Effective importance = base importance + usage bump + feedback bump, clamped
217
+ * to [1,10], then normalised to [0,1] (card #279, task 2 + 3).
218
+ *
219
+ * Folding usage + feedback into the existing γ·importance term — instead of
220
+ * adding new weights — keeps the Park formula a stable 3-weight model. This is
221
+ * a RANKING-ONLY transform: it reads `access_count` and `metadata.feedback`
222
+ * but never writes, deletes, or supersedes anything.
223
+ */
224
+ export function effectiveImportance(entity: ParkInput): number {
225
+ const base = baseImportance(entity.importance, entity.type);
226
+ const bump =
227
+ usageBump(entity.access_count) + feedbackBump(entity.metadata?.feedback);
228
+ let v = base + bump;
229
+ if (v < 1) v = 1;
230
+ if (v > 10) v = 10;
131
231
  return v / 10;
132
232
  }
133
233
 
@@ -160,7 +260,7 @@ export function rescore<T extends ParkInput & { id?: string }>(
160
260
  entity.type,
161
261
  now,
162
262
  );
163
- const importance = importanceNorm(entity.importance, entity.type);
263
+ const importance = effectiveImportance(entity);
164
264
  const score =
165
265
  w.relevance * relevance + w.recency * recency + w.importance * importance;
166
266
  return { entity, relevance, recency, importance, score };
@@ -176,6 +276,28 @@ export function rescore<T extends ParkInput & { id?: string }>(
176
276
  return scored;
177
277
  }
178
278
 
279
+ // ---------------------------------------------------------------------------
280
+ // minConfidence filter (#273)
281
+ // ---------------------------------------------------------------------------
282
+
283
+ /**
284
+ * Keep only entities whose confidence meets the threshold. Entities with a
285
+ * non-numeric confidence are dropped (we can't prove they clear the bar).
286
+ *
287
+ * Pure + exported so the recall path's `minConfidence` semantics are
288
+ * unit-testable. Once writes set non-uniform confidence (#273), a low
289
+ * threshold yields a strictly smaller set than passing no threshold.
290
+ */
291
+ export function filterByMinConfidence<T extends { confidence?: number | null }>(
292
+ entities: T[],
293
+ minConfidence: number | undefined,
294
+ ): T[] {
295
+ if (typeof minConfidence !== "number") return entities;
296
+ return entities.filter(
297
+ (e) => typeof e.confidence === "number" && e.confidence >= minConfidence,
298
+ );
299
+ }
300
+
179
301
  // ---------------------------------------------------------------------------
180
302
  // Rank-to-relevance helper (Phase 1 hybrid retrieval bridge)
181
303
  // ---------------------------------------------------------------------------
@@ -250,3 +372,60 @@ export function fitToBudget<
250
372
  }
251
373
  return out;
252
374
  }
375
+
376
+ // ---------------------------------------------------------------------------
377
+ // Stale / never-recalled signal (card #279, task 4)
378
+ // ---------------------------------------------------------------------------
379
+
380
+ export interface StaleUnusedInput {
381
+ access_count?: number | null;
382
+ last_accessed_at?: string | null;
383
+ created_at?: string | null;
384
+ }
385
+
386
+ /**
387
+ * Returns true when a memory has NEVER been recalled (access_count 0/absent)
388
+ * AND has existed longer than `thresholdDays`. SIGNAL ONLY — card #280's
389
+ * prune-suggestion digest consumes this to surface candidates for human
390
+ * review. It deletes/modifies NOTHING; non-destructive by construction.
391
+ *
392
+ * Age is measured from `created_at` (a never-recalled memory has no meaningful
393
+ * `last_accessed_at`; #273 only stamps it on recall). A memory that has been
394
+ * recalled even once is never stale-unused, regardless of age.
395
+ */
396
+ export function isStaleUnused(
397
+ entity: StaleUnusedInput,
398
+ now: Date,
399
+ thresholdDays: number,
400
+ ): boolean {
401
+ const recalled =
402
+ typeof entity.access_count === "number" && entity.access_count > 0;
403
+ if (recalled) return false;
404
+ const createdRaw = entity.created_at ?? null;
405
+ if (!createdRaw) return false; // Unknown age: don't flag.
406
+ const created = Date.parse(createdRaw);
407
+ if (Number.isNaN(created)) return false;
408
+ const ageDays = (now.getTime() - created) / (1000 * 60 * 60 * 24);
409
+ return ageDays > thresholdDays;
410
+ }
411
+
412
+ // ---------------------------------------------------------------------------
413
+ // Feedback merge (card #279, task 3) — non-destructive counter increment
414
+ // ---------------------------------------------------------------------------
415
+
416
+ /**
417
+ * Merge a single 👍/👎 vote into an existing feedback counter, returning a NEW
418
+ * object (input is never mutated). Used by the recall-feedback record path to
419
+ * compute the `metadata.feedback` patch before persisting. Pure + bounded to
420
+ * non-negative integers.
421
+ */
422
+ export function mergeFeedback(
423
+ existing: MemoryFeedback | null | undefined,
424
+ vote: "up" | "down",
425
+ ): MemoryFeedback {
426
+ const up =
427
+ typeof existing?.up === "number" && existing.up > 0 ? existing.up : 0;
428
+ const down =
429
+ typeof existing?.down === "number" && existing.down > 0 ? existing.down : 0;
430
+ return vote === "up" ? { up: up + 1, down } : { up, down: down + 1 };
431
+ }
@@ -0,0 +1,177 @@
1
+ /**
2
+ * Write-time provenance + signal-derived confidence/importance.
3
+ *
4
+ * Card #273: "Set meaningful confidence, importance & provenance at write
5
+ * time". Three deterministic, rule-based concerns live here (NO LLM rating —
6
+ * LLM-rated importance was reverted in 43782a4 for poisoning the corpus and
7
+ * stays deferred):
8
+ *
9
+ * 1. `MemoryOrigin` — provenance stamped into `metadata.origin`. There is no
10
+ * dedicated provenance column; it rides inside the existing metadata jsonb.
11
+ * 2. `defaultConfidenceForSource` — source/trust-derived confidence default
12
+ * used only when the caller did NOT pass an explicit confidence.
13
+ * 3. `importanceWithSignalBump` — per-type importance default plus a bounded
14
+ * bump for signal density (file paths, "Why"/"How to apply" sections,
15
+ * proper nouns). Pairs with the Park rescore so ranking differentiates.
16
+ *
17
+ * Pure and side-effect free — safe to unit test without network/DB.
18
+ */
19
+
20
+ // ---------------------------------------------------------------------------
21
+ // Provenance shape (task 1)
22
+ // ---------------------------------------------------------------------------
23
+
24
+ /** Where a memory came from. */
25
+ export type MemorySource = "manual" | "assistant" | "agent-run" | "import";
26
+
27
+ /**
28
+ * Provenance record stored at `metadata.origin`. Input for provenance badges
29
+ * (companion UI card) and auto-vs-curated hygiene filtering. All fields except
30
+ * `source` are optional so partial context still round-trips.
31
+ */
32
+ export interface MemoryOrigin {
33
+ source: MemorySource;
34
+ /** Card the write originated from (uuid or short id as string). */
35
+ source_card_id?: string;
36
+ /** Agent/working-memory session that produced the write. */
37
+ source_session_id?: string;
38
+ /** Human or agent identifier that authored the memory. */
39
+ author?: string;
40
+ /**
41
+ * Trust hint carried through from the write call (e.g. 'document',
42
+ * 'manual', 'agent'). Persisted so downstream hygiene can reason about it —
43
+ * previously `source_trust` only reached the Floor and was dropped.
44
+ */
45
+ source_trust?: string;
46
+ }
47
+
48
+ /**
49
+ * Build a `MemoryOrigin`, dropping undefined fields so the stored jsonb stays
50
+ * compact. `source` is always present.
51
+ */
52
+ export function buildOrigin(input: {
53
+ source: MemorySource;
54
+ source_card_id?: string | null;
55
+ source_session_id?: string | null;
56
+ author?: string | null;
57
+ source_trust?: string | null;
58
+ }): MemoryOrigin {
59
+ const origin: MemoryOrigin = { source: input.source };
60
+ if (input.source_card_id) origin.source_card_id = input.source_card_id;
61
+ if (input.source_session_id)
62
+ origin.source_session_id = input.source_session_id;
63
+ if (input.author) origin.author = input.author;
64
+ if (input.source_trust) origin.source_trust = input.source_trust;
65
+ return origin;
66
+ }
67
+
68
+ // ---------------------------------------------------------------------------
69
+ // Source-derived confidence default (task 5)
70
+ // ---------------------------------------------------------------------------
71
+
72
+ /**
73
+ * Default confidence by source/trust, applied ONLY when the caller omits an
74
+ * explicit confidence. An explicit value always wins (handled by the caller).
75
+ *
76
+ * Rule (deterministic, documented):
77
+ * - manual capture (human at the keyboard): high — 0.9
78
+ * - `source_trust === 'document'` import: high — 0.9 (curated long-form)
79
+ * - assistant-authored (board assistant on the user's behalf): 0.8
80
+ * - agent-run auto-extract / low-trust import: moderate — 0.6
81
+ *
82
+ * The point is non-uniformity: most live entities sat at 1.0, which made
83
+ * `minConfidence` filtering inert. Auto-written memories should start lower so
84
+ * a curated pattern outranks a freshly auto-extracted episode at equal
85
+ * relevance/recency.
86
+ */
87
+ export function defaultConfidenceForSource(input: {
88
+ source: MemorySource;
89
+ source_trust?: string;
90
+ }): number {
91
+ if (input.source_trust === "document") return 0.9;
92
+ if (input.source_trust === "manual") return 0.9;
93
+ switch (input.source) {
94
+ case "manual":
95
+ return 0.9;
96
+ case "assistant":
97
+ return 0.8;
98
+ case "agent-run":
99
+ return 0.6;
100
+ case "import":
101
+ return 0.6;
102
+ default:
103
+ return 0.7;
104
+ }
105
+ }
106
+
107
+ // ---------------------------------------------------------------------------
108
+ // Importance signal-density bump (task 6)
109
+ // ---------------------------------------------------------------------------
110
+
111
+ /**
112
+ * Per-type importance default. Mirrors the edge function `IMPORTANCE_DEFAULTS`
113
+ * and `memory-park.ts` `TYPE_IMPORTANCE_DEFAULT`; duplicated here so the write
114
+ * path doesn't import the rescorer. Values in [1,10].
115
+ */
116
+ export const TYPE_IMPORTANCE_DEFAULT: Record<string, number> = {
117
+ preference: 9,
118
+ lesson: 8,
119
+ decision: 8,
120
+ pattern: 7,
121
+ solution: 7,
122
+ procedure: 7,
123
+ error: 5,
124
+ context: 5,
125
+ task: 5,
126
+ agent: 5,
127
+ relationship: 6,
128
+ commitment: 7,
129
+ project: 6,
130
+ handoff: 6,
131
+ };
132
+
133
+ // A file path / module path, e.g. `src/foo/bar.ts`, `packages/x/y`.
134
+ const FILE_PATH =
135
+ /(?:[\w.-]+\/){1,}[\w.-]+\.[a-z]{1,4}\b|(?:[\w-]+\/){2,}[\w-]+/i;
136
+ // A "Why" or "How to apply" style guidance heading.
137
+ const GUIDANCE_SECTION =
138
+ /\b(why|how to apply|how to use|takeaway|root cause)\b/i;
139
+ // A proper noun / identifier in the body (PascalCase, camelCase, snake_case,
140
+ // or a backtick-fenced symbol).
141
+ const PROPER_NOUN =
142
+ /`[^`]+`|\b[A-Z][a-z0-9]+[A-Z][A-Za-z0-9]*\b|\b[a-z]+_[a-z][\w]*\b/;
143
+
144
+ /**
145
+ * Count distinct signal-density cues present in the content. Bounded [0,3].
146
+ * +1 — contains a file/module path
147
+ * +1 — contains a "Why"/"How to apply"/"Takeaway"/"Root cause" section
148
+ * +1 — contains a proper noun / code identifier
149
+ */
150
+ export function signalDensity(content: string): number {
151
+ let n = 0;
152
+ if (FILE_PATH.test(content)) n += 1;
153
+ if (GUIDANCE_SECTION.test(content)) n += 1;
154
+ if (PROPER_NOUN.test(content)) n += 1;
155
+ return n;
156
+ }
157
+
158
+ /**
159
+ * Importance = per-type default + bounded signal-density bump, clamped to
160
+ * [1,10]. Deterministic and rule-based.
161
+ *
162
+ * The bump is +1 per cue, capped at +2 total, so a dense, file-path-bearing
163
+ * lesson edges out a thin one of the same type — giving the Park rescore a
164
+ * non-flat importance term to work with. A caller-provided importance always
165
+ * takes precedence and never enters this function.
166
+ */
167
+ export function importanceWithSignalBump(
168
+ type: string,
169
+ content: string,
170
+ ): number {
171
+ const base = TYPE_IMPORTANCE_DEFAULT[type] ?? 5;
172
+ const bump = Math.min(2, signalDensity(content));
173
+ const v = base + bump;
174
+ if (v < 1) return 1;
175
+ if (v > 10) return 10;
176
+ return v;
177
+ }