@gethmy/mcp 2.4.7 → 2.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,252 @@
1
+ /**
2
+ * Park-style memory rescoring (recency × importance × relevance).
3
+ *
4
+ * Implements §6 of docs/superpowers/plans/2026-05-07-memory-architecture-v2.md.
5
+ *
6
+ * final_score = α · relevance + β · recency_decay + γ · importance_norm
7
+ *
8
+ * relevance — caller-provided. Cosine, RRF score, or a heuristic
9
+ * relevance proxy in [0, 1]. When the caller has no
10
+ * ranking signal at all, supply 0.5 as a neutral
11
+ * baseline so recency + importance still differentiate.
12
+ * recency_decay — exp(-Δt_seconds / τ_type) clamped to [0, 1].
13
+ * τ depends on memory type per plan §4.
14
+ * importance_norm — importance / 10, clamped to [0, 1].
15
+ *
16
+ * defaults: α=0.55, β=0.25, γ=0.20 (sum to 1.0).
17
+ *
18
+ * The function is pure. Hot-path-safe — no LLM calls, no DB reads.
19
+ */
20
+
21
+ // ---------------------------------------------------------------------------
22
+ // Tunables (§4 + §6)
23
+ // ---------------------------------------------------------------------------
24
+
25
+ export const DEFAULT_WEIGHTS = {
26
+ relevance: 0.55,
27
+ recency: 0.25,
28
+ importance: 0.2,
29
+ } as const;
30
+
31
+ // Per-type recency time constant τ in seconds.
32
+ // `Infinity` = never decays (preferences shouldn't fade with disuse).
33
+ export const TYPE_TAU_SECONDS: Record<string, number> = {
34
+ preference: Number.POSITIVE_INFINITY,
35
+ pattern: 60 * 60 * 24 * 180, // 180d
36
+ reference: 60 * 60 * 24 * 180, // alias
37
+ procedure: 60 * 60 * 24 * 180,
38
+ decision: 60 * 60 * 24 * 90, // 90d
39
+ solution: 60 * 60 * 24 * 90,
40
+ lesson: 60 * 60 * 24 * 90,
41
+ context: 60 * 60 * 24 * 30, // 30d
42
+ error: 60 * 60 * 24 * 30,
43
+ task: 60 * 60 * 24 * 30,
44
+ agent: 60 * 60 * 24 * 30,
45
+ handoff: 60 * 60 * 24 * 30,
46
+ relationship: 60 * 60 * 24 * 90,
47
+ commitment: 60 * 60 * 24 * 90,
48
+ project: 60 * 60 * 24 * 180,
49
+ };
50
+
51
+ // Per-type importance default (mirrors edge function IMPORTANCE_DEFAULTS;
52
+ // duplicated here so the rescorer doesn't depend on the edge function).
53
+ export const TYPE_IMPORTANCE_DEFAULT: Record<string, number> = {
54
+ preference: 9,
55
+ lesson: 8,
56
+ decision: 8,
57
+ pattern: 7,
58
+ solution: 7,
59
+ procedure: 7,
60
+ error: 5,
61
+ context: 5,
62
+ task: 5,
63
+ agent: 5,
64
+ relationship: 6,
65
+ commitment: 7,
66
+ project: 6,
67
+ handoff: 6,
68
+ };
69
+
70
+ // ---------------------------------------------------------------------------
71
+ // Types
72
+ // ---------------------------------------------------------------------------
73
+
74
+ export interface ParkInput {
75
+ type: string;
76
+ importance?: number | null;
77
+ last_accessed_at?: string | null;
78
+ created_at?: string | null;
79
+ }
80
+
81
+ export interface ParkScored<T extends ParkInput> {
82
+ entity: T;
83
+ relevance: number;
84
+ recency: number;
85
+ importance: number;
86
+ score: number;
87
+ }
88
+
89
+ export interface ParkOptions {
90
+ weights?: Partial<typeof DEFAULT_WEIGHTS>;
91
+ now?: Date; // Inject for testing
92
+ /**
93
+ * Caller-supplied relevance per entity. Map by entity id. Anything missing
94
+ * defaults to 0.5 (neutral). When the retrieval path has no ranking signal
95
+ * (e.g. plain filter list), pass an empty Map.
96
+ */
97
+ relevance?: Map<string, number>;
98
+ }
99
+
100
+ // ---------------------------------------------------------------------------
101
+ // Math helpers
102
+ // ---------------------------------------------------------------------------
103
+
104
+ function clamp01(x: number): number {
105
+ if (Number.isNaN(x)) return 0;
106
+ if (x < 0) return 0;
107
+ if (x > 1) return 1;
108
+ return x;
109
+ }
110
+
111
+ function recencyDecay(
112
+ lastAccessedAt: string | null | undefined,
113
+ createdAt: string | null | undefined,
114
+ type: string,
115
+ now: Date,
116
+ ): number {
117
+ const tau = TYPE_TAU_SECONDS[type] ?? TYPE_TAU_SECONDS.context!;
118
+ if (!Number.isFinite(tau)) return 1; // Preferences never decay.
119
+ const ts = lastAccessedAt ?? createdAt ?? null;
120
+ if (!ts) return 0.5; // Unknown timestamp: neutral.
121
+ const t = Date.parse(ts);
122
+ if (Number.isNaN(t)) return 0.5;
123
+ const dtSec = Math.max(0, (now.getTime() - t) / 1000);
124
+ return clamp01(Math.exp(-dtSec / tau));
125
+ }
126
+
127
+ function importanceNorm(raw: number | null | undefined, type: string): number {
128
+ let v = typeof raw === "number" ? raw : (TYPE_IMPORTANCE_DEFAULT[type] ?? 5);
129
+ if (v < 1) v = 1;
130
+ if (v > 10) v = 10;
131
+ return v / 10;
132
+ }
133
+
134
+ // ---------------------------------------------------------------------------
135
+ // Public API
136
+ // ---------------------------------------------------------------------------
137
+
138
+ /**
139
+ * Score a list of entities using Park's recency × importance × relevance
140
+ * formula. Returns the same entities decorated with the four numbers and
141
+ * sorted by `score` descending.
142
+ *
143
+ * Stable sort: ties are broken by created_at descending (newer first).
144
+ */
145
+ export function rescore<T extends ParkInput & { id?: string }>(
146
+ entities: T[],
147
+ options: ParkOptions = {},
148
+ ): ParkScored<T>[] {
149
+ const w = { ...DEFAULT_WEIGHTS, ...(options.weights ?? {}) };
150
+ const now = options.now ?? new Date();
151
+ const relevanceMap = options.relevance ?? new Map();
152
+
153
+ const scored: ParkScored<T>[] = entities.map((entity) => {
154
+ const relevance = clamp01(
155
+ relevanceMap.get((entity as { id?: string }).id ?? "") ?? 0.5,
156
+ );
157
+ const recency = recencyDecay(
158
+ entity.last_accessed_at,
159
+ entity.created_at,
160
+ entity.type,
161
+ now,
162
+ );
163
+ const importance = importanceNorm(entity.importance, entity.type);
164
+ const score =
165
+ w.relevance * relevance + w.recency * recency + w.importance * importance;
166
+ return { entity, relevance, recency, importance, score };
167
+ });
168
+
169
+ scored.sort((a, b) => {
170
+ if (b.score !== a.score) return b.score - a.score;
171
+ const aT = (a.entity.created_at ? Date.parse(a.entity.created_at) : 0) || 0;
172
+ const bT = (b.entity.created_at ? Date.parse(b.entity.created_at) : 0) || 0;
173
+ return bT - aT;
174
+ });
175
+
176
+ return scored;
177
+ }
178
+
179
+ // ---------------------------------------------------------------------------
180
+ // Rank-to-relevance helper (Phase 1 hybrid retrieval bridge)
181
+ // ---------------------------------------------------------------------------
182
+
183
+ /**
184
+ * Convert an ordered list of candidates from a hybrid retriever (vector +
185
+ * lexical + RRF fusion) into a Map<id, relevance> that Park's rescorer can
186
+ * consume. Uses an exponential falloff so the top result dominates while
187
+ * deeper candidates still register a non-zero signal.
188
+ *
189
+ * relevance(rank) = exp(-rank / decay)
190
+ *
191
+ * With `decay=10`, rank 0 is 1.00, rank 10 is 0.37, rank 30 is 0.05.
192
+ * Tunable via the `decay` argument.
193
+ *
194
+ * Phase 2 may replace this with raw RRF scores from the edge function once
195
+ * the RPC starts surfacing them per-entity. The position-based fallback
196
+ * here is intentional: it makes the scorer work today without a database
197
+ * schema change.
198
+ */
199
+ export function relevanceFromRank<T extends { id: string }>(
200
+ ranked: T[],
201
+ decay = 10,
202
+ ): Map<string, number> {
203
+ const out = new Map<string, number>();
204
+ ranked.forEach((entity, rank) => {
205
+ out.set(entity.id, clamp01(Math.exp(-rank / decay)));
206
+ });
207
+ return out;
208
+ }
209
+
210
+ // ---------------------------------------------------------------------------
211
+ // Token budget — greedy fill (§6.2 step 5)
212
+ // ---------------------------------------------------------------------------
213
+
214
+ interface TokenBudgetable {
215
+ title?: string | null;
216
+ content?: string | null;
217
+ }
218
+
219
+ const APPROX_CHARS_PER_TOKEN = 4;
220
+ const CONTENT_PREVIEW_CHARS = 200;
221
+
222
+ function approxTokens(s: string): number {
223
+ return Math.ceil(s.length / APPROX_CHARS_PER_TOKEN);
224
+ }
225
+
226
+ /**
227
+ * Greedy-fill a sorted list of scored entities into a token budget.
228
+ * Each entity contributes its title plus up to 200 chars of content.
229
+ * Returns the prefix that fits.
230
+ */
231
+ export function fitToBudget<
232
+ T extends ParkInput & TokenBudgetable & { id?: string },
233
+ >(scored: ParkScored<T>[], budgetTokens: number): ParkScored<T>[] {
234
+ if (budgetTokens <= 0) return [];
235
+ const out: ParkScored<T>[] = [];
236
+ let used = 0;
237
+ for (const s of scored) {
238
+ const title = (s.entity.title ?? "").slice(0, 300);
239
+ const content = (s.entity.content ?? "").slice(0, CONTENT_PREVIEW_CHARS);
240
+ const cost = approxTokens(title) + approxTokens(content) + 4; // marker overhead
241
+ if (used + cost > budgetTokens) {
242
+ if (out.length === 0) {
243
+ // Always include at least the top result, even if oversized.
244
+ out.push(s);
245
+ }
246
+ break;
247
+ }
248
+ out.push(s);
249
+ used += cost;
250
+ }
251
+ return out;
252
+ }
@@ -0,0 +1,61 @@
1
+ /**
2
+ * Session-scoped working memory helpers (plan §12 Phase 1, §13.1 D3).
3
+ *
4
+ * Working memories live alongside long-term memories in `knowledge_entities`,
5
+ * distinguished only by their `scope` value. The scope literal `'session'` is
6
+ * an alias the caller passes to `harmony_remember`; this module resolves it to
7
+ * the concrete `session:<agent_session_id>` form against the active session.
8
+ *
9
+ * Composite tenancy `(workspace_id, agent_session_id)` is enforced by RLS at
10
+ * the workspace_id level (existing) plus the session id embedded in the scope
11
+ * string. Cross-workspace leakage is impossible because workspace membership
12
+ * gates the row in the first place.
13
+ */
14
+
15
+ const SESSION_SCOPE_PREFIX = "session:";
16
+
17
+ /**
18
+ * Resolve the caller-provided `scope` argument to a storable scope value.
19
+ *
20
+ * - `'session'` (the alias) → `session:<agentSessionId>` if a session is
21
+ * active, otherwise throws.
22
+ * - Any other string is returned unchanged so existing scope semantics
23
+ * (`'private' | 'project' | 'workspace' | 'global'`) keep working.
24
+ * - `undefined` passes through; the caller's default (typically `'project'`)
25
+ * continues to apply.
26
+ */
27
+ export function resolveSessionScope(
28
+ requested: string | undefined,
29
+ activeSessionId: string | undefined,
30
+ ): string | undefined {
31
+ if (requested !== "session") return requested;
32
+ if (!activeSessionId) {
33
+ throw new Error(
34
+ "scope='session' requires an active agent session. " +
35
+ "Call harmony_start_agent_session before storing session memories.",
36
+ );
37
+ }
38
+ return `${SESSION_SCOPE_PREFIX}${activeSessionId}`;
39
+ }
40
+
41
+ /**
42
+ * True when the scope string is a concrete session-scope value
43
+ * (`session:<id>`). The bare alias `'session'` returns `false` because it has
44
+ * not yet been resolved to a specific session.
45
+ */
46
+ export function isSessionScope(scope: string | undefined | null): boolean {
47
+ if (typeof scope !== "string") return false;
48
+ return (
49
+ scope.length > SESSION_SCOPE_PREFIX.length &&
50
+ scope.startsWith(SESSION_SCOPE_PREFIX)
51
+ );
52
+ }
53
+
54
+ /**
55
+ * Build the concrete scope value for a known session id without going through
56
+ * the alias-resolution path. Useful when the caller already has the id in
57
+ * hand (e.g. retrieval prepending working memories for the active session).
58
+ */
59
+ export function sessionScopeFor(agentSessionId: string): string {
60
+ return `${SESSION_SCOPE_PREFIX}${agentSessionId}`;
61
+ }
@@ -5,6 +5,23 @@
5
5
  * context extraction, and variant-specific instructions.
6
6
  */
7
7
 
8
+ import { createHash, randomUUID } from "node:crypto";
9
+
10
+ /**
11
+ * Current prompt template version. Bump when the structural framing changes
12
+ * (sections added/removed/reordered) so cohort analysis can compare apples
13
+ * to apples.
14
+ */
15
+ export const PROMPT_TEMPLATE_VERSION = 1;
16
+
17
+ /**
18
+ * Compute a stable content hash for a generated prompt. Used to group
19
+ * cohort siblings for outcome-feedback analysis.
20
+ */
21
+ export function computeContentHash(prompt: string): string {
22
+ return createHash("sha256").update(prompt).digest("hex");
23
+ }
24
+
8
25
  // Types
9
26
  export type PromptVariant = "analysis" | "draft" | "execute";
10
27
  export type LabelCategory =
@@ -52,6 +69,12 @@ export interface GeneratedPrompt {
52
69
  };
53
70
  tokenEstimate: number;
54
71
  assemblyId?: string;
72
+ /** Local UUID identifying this generated snapshot. Persisted on prompt_history. */
73
+ promptId: string;
74
+ /** SHA-256 of the generated prompt body — cohort key for variant analysis. */
75
+ contentHash: string;
76
+ /** Template version that produced this prompt. */
77
+ version: number;
55
78
  }
56
79
 
57
80
  // Label name to category mapping
@@ -533,6 +556,9 @@ Keep \`currentTask\` specific (e.g., "Refactoring auth middleware" not "Working
533
556
  },
534
557
  tokenEstimate: estimateTokens(prompt),
535
558
  ...(assemblyId && { assemblyId }),
559
+ promptId: randomUUID(),
560
+ contentHash: computeContentHash(prompt),
561
+ version: PROMPT_TEMPLATE_VERSION,
536
562
  };
537
563
  }
538
564
 
@@ -694,3 +720,70 @@ export function getAvailableCategories(): LabelCategory[] {
694
720
  export function getAvailableVariants(): PromptVariant[] {
695
721
  return ["analysis", "draft", "execute"];
696
722
  }
723
+
724
+ // ─── Variant proposal (logged-only — no auto-commit) ──────────────────
725
+
726
+ /** Cohort row shape consumed by {@link proposePromptVariant}. */
727
+ export interface PromptCohortRow {
728
+ /** Final agent session status — only "completed" is treated as success. */
729
+ status: string | null;
730
+ /** Final progress percent recorded on the linked session, when present. */
731
+ progressPercent: number | null;
732
+ /** Whether the linked session ended with non-empty blockers. */
733
+ hadBlockers: boolean;
734
+ }
735
+
736
+ export interface PromptVariantSuggestion {
737
+ contentHash: string;
738
+ cohortSize: number;
739
+ completionRate: number;
740
+ framingHint: string;
741
+ }
742
+
743
+ const VARIANT_MIN_COHORT = 10;
744
+ const VARIANT_COMPLETION_THRESHOLD = 0.4;
745
+
746
+ /**
747
+ * Propose an alternative framing for prompts with a given content hash, based
748
+ * on observed session outcomes. Returns null when the cohort is too small or
749
+ * the completion rate is acceptable.
750
+ *
751
+ * Per the AGP-P2 locked decision, this is logged-only — callers may surface
752
+ * the suggestion to humans, but no auto-commit of new templates is allowed.
753
+ *
754
+ * @param fetchCohort — async loader that returns one row per session that
755
+ * consumed a prompt with this hash. Keeps this module decoupled from the
756
+ * API client so it stays pure-testable.
757
+ */
758
+ export async function proposePromptVariant(
759
+ contentHash: string,
760
+ fetchCohort: (hash: string) => Promise<PromptCohortRow[]>,
761
+ ): Promise<PromptVariantSuggestion | null> {
762
+ if (!contentHash) return null;
763
+ const cohort = await fetchCohort(contentHash);
764
+ if (!cohort || cohort.length < VARIANT_MIN_COHORT) return null;
765
+
766
+ const completed = cohort.filter(
767
+ (r) =>
768
+ r.status === "completed" &&
769
+ (r.progressPercent ?? 0) >= 100 &&
770
+ !r.hadBlockers,
771
+ ).length;
772
+ const completionRate = completed / cohort.length;
773
+
774
+ if (completionRate >= VARIANT_COMPLETION_THRESHOLD) return null;
775
+
776
+ const blockerRate =
777
+ cohort.filter((r) => r.hadBlockers).length / cohort.length;
778
+ const framingHint =
779
+ blockerRate >= 0.4
780
+ ? "Cohort hits frequent blockers — try a more diagnostic framing (require root-cause + repro before any fix)."
781
+ : "Cohort frequently stalls without finishing — try a more action-forcing framing (smaller subtasks, explicit DoD checklist).";
782
+
783
+ return {
784
+ contentHash,
785
+ cohortSize: cohort.length,
786
+ completionRate,
787
+ framingHint,
788
+ };
789
+ }