npm - @gethmy/mcp - Versions diffs - 2.4.7 → 2.5.1 - Mend

@gethmy/mcp 2.4.7 → 2.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (32) hide show

package/README.md +36 -20
package/dist/cli.js +1857 -30209
package/dist/index.js +1323 -26686
package/dist/lib/api-client.js +122 -925
package/package.json +4 -3
package/src/api-client.ts +129 -96
package/src/memory-floor.ts +264 -0
package/src/memory-park.ts +252 -0
package/src/memory-session.ts +61 -0
package/src/prompt-builder.ts +93 -0
package/src/server.ts +351 -1467
package/dist/http.js +0 -1959
package/dist/remote.js +0 -32328
package/dist/server.js +0 -31967
package/src/__tests__/active-learning.test.ts +0 -483
package/src/__tests__/agent-performance-profiles.test.ts +0 -468
package/src/__tests__/auto-session.test.ts +0 -912
package/src/__tests__/context-assembly.test.ts +0 -506
package/src/__tests__/graph-expansion.test.ts +0 -285
package/src/__tests__/integration-memory-crud.test.ts +0 -948
package/src/__tests__/integration-memory-system.test.ts +0 -321
package/src/__tests__/lifecycle-maintenance.test.ts +0 -238
package/src/__tests__/memory-audit.test.ts +0 -528
package/src/__tests__/pattern-detection.test.ts +0 -438
package/src/__tests__/prompt-builder.test.ts +0 -505
package/src/__tests__/remote-routing.test.ts +0 -285
package/src/active-learning.ts +0 -1165
package/src/consolidation.ts +0 -383
package/src/context-assembly.ts +0 -1175
package/src/lifecycle-maintenance.ts +0 -120
package/src/memory-audit.ts +0 -578
package/src/memory-cleanup.ts +0 -902

package/src/memory-park.ts ADDED Viewed

@@ -0,0 +1,252 @@
+/**
+ * Park-style memory rescoring (recency × importance × relevance).
+ *
+ * Implements §6 of docs/superpowers/plans/2026-05-07-memory-architecture-v2.md.
+ *
+ *   final_score = α · relevance + β · recency_decay + γ · importance_norm
+ *
+ *   relevance        — caller-provided. Cosine, RRF score, or a heuristic
+ *                      relevance proxy in [0, 1]. When the caller has no
+ *                      ranking signal at all, supply 0.5 as a neutral
+ *                      baseline so recency + importance still differentiate.
+ *   recency_decay    — exp(-Δt_seconds / τ_type) clamped to [0, 1].
+ *                      τ depends on memory type per plan §4.
+ *   importance_norm  — importance / 10, clamped to [0, 1].
+ *
+ *   defaults: α=0.55, β=0.25, γ=0.20 (sum to 1.0).
+ *
+ * The function is pure. Hot-path-safe — no LLM calls, no DB reads.
+ */
+// ---------------------------------------------------------------------------
+// Tunables (§4 + §6)
+// ---------------------------------------------------------------------------
+export const DEFAULT_WEIGHTS = {
+  relevance: 0.55,
+  recency: 0.25,
+  importance: 0.2,
+} as const;
+// Per-type recency time constant τ in seconds.
+// `Infinity` = never decays (preferences shouldn't fade with disuse).
+export const TYPE_TAU_SECONDS: Record<string, number> = {
+  preference: Number.POSITIVE_INFINITY,
+  pattern: 60 * 60 * 24 * 180, // 180d
+  reference: 60 * 60 * 24 * 180, // alias
+  procedure: 60 * 60 * 24 * 180,
+  decision: 60 * 60 * 24 * 90, // 90d
+  solution: 60 * 60 * 24 * 90,
+  lesson: 60 * 60 * 24 * 90,
+  context: 60 * 60 * 24 * 30, // 30d
+  error: 60 * 60 * 24 * 30,
+  task: 60 * 60 * 24 * 30,
+  agent: 60 * 60 * 24 * 30,
+  handoff: 60 * 60 * 24 * 30,
+  relationship: 60 * 60 * 24 * 90,
+  commitment: 60 * 60 * 24 * 90,
+  project: 60 * 60 * 24 * 180,
+};
+// Per-type importance default (mirrors edge function IMPORTANCE_DEFAULTS;
+// duplicated here so the rescorer doesn't depend on the edge function).
+export const TYPE_IMPORTANCE_DEFAULT: Record<string, number> = {
+  preference: 9,
+  lesson: 8,
+  decision: 8,
+  pattern: 7,
+  solution: 7,
+  procedure: 7,
+  error: 5,
+  context: 5,
+  task: 5,
+  agent: 5,
+  relationship: 6,
+  commitment: 7,
+  project: 6,
+  handoff: 6,
+};
+// ---------------------------------------------------------------------------
+// Types
+// ---------------------------------------------------------------------------
+export interface ParkInput {
+  type: string;
+  importance?: number | null;
+  last_accessed_at?: string | null;
+  created_at?: string | null;
+}
+export interface ParkScored<T extends ParkInput> {
+  entity: T;
+  relevance: number;
+  recency: number;
+  importance: number;
+  score: number;
+}
+export interface ParkOptions {
+  weights?: Partial<typeof DEFAULT_WEIGHTS>;
+  now?: Date; // Inject for testing
+  /**
+   * Caller-supplied relevance per entity. Map by entity id. Anything missing
+   * defaults to 0.5 (neutral). When the retrieval path has no ranking signal
+   * (e.g. plain filter list), pass an empty Map.
+   */
+  relevance?: Map<string, number>;
+}
+// ---------------------------------------------------------------------------
+// Math helpers
+// ---------------------------------------------------------------------------
+function clamp01(x: number): number {
+  if (Number.isNaN(x)) return 0;
+  if (x < 0) return 0;
+  if (x > 1) return 1;
+  return x;
+}
+function recencyDecay(
+  lastAccessedAt: string | null | undefined,
+  createdAt: string | null | undefined,
+  type: string,
+  now: Date,
+): number {
+  const tau = TYPE_TAU_SECONDS[type] ?? TYPE_TAU_SECONDS.context!;
+  if (!Number.isFinite(tau)) return 1; // Preferences never decay.
+  const ts = lastAccessedAt ?? createdAt ?? null;
+  if (!ts) return 0.5; // Unknown timestamp: neutral.
+  const t = Date.parse(ts);
+  if (Number.isNaN(t)) return 0.5;
+  const dtSec = Math.max(0, (now.getTime() - t) / 1000);
+  return clamp01(Math.exp(-dtSec / tau));
+}
+function importanceNorm(raw: number | null | undefined, type: string): number {
+  let v = typeof raw === "number" ? raw : (TYPE_IMPORTANCE_DEFAULT[type] ?? 5);
+  if (v < 1) v = 1;
+  if (v > 10) v = 10;
+  return v / 10;
+}
+// ---------------------------------------------------------------------------
+// Public API
+// ---------------------------------------------------------------------------
+/**
+ * Score a list of entities using Park's recency × importance × relevance
+ * formula. Returns the same entities decorated with the four numbers and
+ * sorted by `score` descending.
+ *
+ * Stable sort: ties are broken by created_at descending (newer first).
+ */
+export function rescore<T extends ParkInput & { id?: string }>(
+  entities: T[],
+  options: ParkOptions = {},
+): ParkScored<T>[] {
+  const w = { ...DEFAULT_WEIGHTS, ...(options.weights ?? {}) };
+  const now = options.now ?? new Date();
+  const relevanceMap = options.relevance ?? new Map();
+  const scored: ParkScored<T>[] = entities.map((entity) => {
+    const relevance = clamp01(
+      relevanceMap.get((entity as { id?: string }).id ?? "") ?? 0.5,
+    );
+    const recency = recencyDecay(
+      entity.last_accessed_at,
+      entity.created_at,
+      entity.type,
+      now,
+    );
+    const importance = importanceNorm(entity.importance, entity.type);
+    const score =
+      w.relevance * relevance + w.recency * recency + w.importance * importance;
+    return { entity, relevance, recency, importance, score };
+  });
+  scored.sort((a, b) => {
+    if (b.score !== a.score) return b.score - a.score;
+    const aT = (a.entity.created_at ? Date.parse(a.entity.created_at) : 0) || 0;
+    const bT = (b.entity.created_at ? Date.parse(b.entity.created_at) : 0) || 0;
+    return bT - aT;
+  });
+  return scored;
+}
+// ---------------------------------------------------------------------------
+// Rank-to-relevance helper (Phase 1 hybrid retrieval bridge)
+// ---------------------------------------------------------------------------
+/**
+ * Convert an ordered list of candidates from a hybrid retriever (vector +
+ * lexical + RRF fusion) into a Map<id, relevance> that Park's rescorer can
+ * consume. Uses an exponential falloff so the top result dominates while
+ * deeper candidates still register a non-zero signal.
+ *
+ *   relevance(rank) = exp(-rank / decay)
+ *
+ * With `decay=10`, rank 0 is 1.00, rank 10 is 0.37, rank 30 is 0.05.
+ * Tunable via the `decay` argument.
+ *
+ * Phase 2 may replace this with raw RRF scores from the edge function once
+ * the RPC starts surfacing them per-entity. The position-based fallback
+ * here is intentional: it makes the scorer work today without a database
+ * schema change.
+ */
+export function relevanceFromRank<T extends { id: string }>(
+  ranked: T[],
+  decay = 10,
+): Map<string, number> {
+  const out = new Map<string, number>();
+  ranked.forEach((entity, rank) => {
+    out.set(entity.id, clamp01(Math.exp(-rank / decay)));
+  });
+  return out;
+}
+// ---------------------------------------------------------------------------
+// Token budget — greedy fill (§6.2 step 5)
+// ---------------------------------------------------------------------------
+interface TokenBudgetable {
+  title?: string | null;
+  content?: string | null;
+}
+const APPROX_CHARS_PER_TOKEN = 4;
+const CONTENT_PREVIEW_CHARS = 200;
+function approxTokens(s: string): number {
+  return Math.ceil(s.length / APPROX_CHARS_PER_TOKEN);
+}
+/**
+ * Greedy-fill a sorted list of scored entities into a token budget.
+ * Each entity contributes its title plus up to 200 chars of content.
+ * Returns the prefix that fits.
+ */
+export function fitToBudget<
+  T extends ParkInput & TokenBudgetable & { id?: string },
+>(scored: ParkScored<T>[], budgetTokens: number): ParkScored<T>[] {
+  if (budgetTokens <= 0) return [];
+  const out: ParkScored<T>[] = [];
+  let used = 0;
+  for (const s of scored) {
+    const title = (s.entity.title ?? "").slice(0, 300);
+    const content = (s.entity.content ?? "").slice(0, CONTENT_PREVIEW_CHARS);
+    const cost = approxTokens(title) + approxTokens(content) + 4; // marker overhead
+    if (used + cost > budgetTokens) {
+      if (out.length === 0) {
+        // Always include at least the top result, even if oversized.
+        out.push(s);
+      }
+      break;
+    }
+    out.push(s);
+    used += cost;
+  }
+  return out;
+}

package/src/memory-session.ts ADDED Viewed

@@ -0,0 +1,61 @@
+/**
+ * Session-scoped working memory helpers (plan §12 Phase 1, §13.1 D3).
+ *
+ * Working memories live alongside long-term memories in `knowledge_entities`,
+ * distinguished only by their `scope` value. The scope literal `'session'` is
+ * an alias the caller passes to `harmony_remember`; this module resolves it to
+ * the concrete `session:<agent_session_id>` form against the active session.
+ *
+ * Composite tenancy `(workspace_id, agent_session_id)` is enforced by RLS at
+ * the workspace_id level (existing) plus the session id embedded in the scope
+ * string. Cross-workspace leakage is impossible because workspace membership
+ * gates the row in the first place.
+ */
+const SESSION_SCOPE_PREFIX = "session:";
+/**
+ * Resolve the caller-provided `scope` argument to a storable scope value.
+ *
+ * - `'session'` (the alias) → `session:<agentSessionId>` if a session is
+ *   active, otherwise throws.
+ * - Any other string is returned unchanged so existing scope semantics
+ *   (`'private' | 'project' | 'workspace' | 'global'`) keep working.
+ * - `undefined` passes through; the caller's default (typically `'project'`)
+ *   continues to apply.
+ */
+export function resolveSessionScope(
+  requested: string | undefined,
+  activeSessionId: string | undefined,
+): string | undefined {
+  if (requested !== "session") return requested;
+  if (!activeSessionId) {
+    throw new Error(
+      "scope='session' requires an active agent session. " +
+        "Call harmony_start_agent_session before storing session memories.",
+    );
+  }
+  return `${SESSION_SCOPE_PREFIX}${activeSessionId}`;
+}
+/**
+ * True when the scope string is a concrete session-scope value
+ * (`session:<id>`). The bare alias `'session'` returns `false` because it has
+ * not yet been resolved to a specific session.
+ */
+export function isSessionScope(scope: string | undefined | null): boolean {
+  if (typeof scope !== "string") return false;
+  return (
+    scope.length > SESSION_SCOPE_PREFIX.length &&
+    scope.startsWith(SESSION_SCOPE_PREFIX)
+  );
+}
+/**
+ * Build the concrete scope value for a known session id without going through
+ * the alias-resolution path. Useful when the caller already has the id in
+ * hand (e.g. retrieval prepending working memories for the active session).
+ */
+export function sessionScopeFor(agentSessionId: string): string {
+  return `${SESSION_SCOPE_PREFIX}${agentSessionId}`;
+}

package/src/prompt-builder.ts CHANGED Viewed

@@ -5,6 +5,23 @@
  * context extraction, and variant-specific instructions.
  */
+import { createHash, randomUUID } from "node:crypto";
+/**
+ * Current prompt template version. Bump when the structural framing changes
+ * (sections added/removed/reordered) so cohort analysis can compare apples
+ * to apples.
+ */
+export const PROMPT_TEMPLATE_VERSION = 1;
+/**
+ * Compute a stable content hash for a generated prompt. Used to group
+ * cohort siblings for outcome-feedback analysis.
+ */
+export function computeContentHash(prompt: string): string {
+  return createHash("sha256").update(prompt).digest("hex");
+}
 // Types
 export type PromptVariant = "analysis" | "draft" | "execute";
 export type LabelCategory =
@@ -52,6 +69,12 @@ export interface GeneratedPrompt {
   };
   tokenEstimate: number;
   assemblyId?: string;
+  /** Local UUID identifying this generated snapshot. Persisted on prompt_history. */
+  promptId: string;
+  /** SHA-256 of the generated prompt body — cohort key for variant analysis. */
+  contentHash: string;
+  /** Template version that produced this prompt. */
+  version: number;
 }
 // Label name to category mapping
@@ -533,6 +556,9 @@ Keep \`currentTask\` specific (e.g., "Refactoring auth middleware" not "Working
     },
     tokenEstimate: estimateTokens(prompt),
     ...(assemblyId && { assemblyId }),
+    promptId: randomUUID(),
+    contentHash: computeContentHash(prompt),
+    version: PROMPT_TEMPLATE_VERSION,
   };
 }
@@ -694,3 +720,70 @@ export function getAvailableCategories(): LabelCategory[] {
 export function getAvailableVariants(): PromptVariant[] {
   return ["analysis", "draft", "execute"];
 }
+// ─── Variant proposal (logged-only — no auto-commit) ──────────────────
+/** Cohort row shape consumed by {@link proposePromptVariant}. */
+export interface PromptCohortRow {
+  /** Final agent session status — only "completed" is treated as success. */
+  status: string | null;
+  /** Final progress percent recorded on the linked session, when present. */
+  progressPercent: number | null;
+  /** Whether the linked session ended with non-empty blockers. */
+  hadBlockers: boolean;
+}
+export interface PromptVariantSuggestion {
+  contentHash: string;
+  cohortSize: number;
+  completionRate: number;
+  framingHint: string;
+}
+const VARIANT_MIN_COHORT = 10;
+const VARIANT_COMPLETION_THRESHOLD = 0.4;
+/**
+ * Propose an alternative framing for prompts with a given content hash, based
+ * on observed session outcomes. Returns null when the cohort is too small or
+ * the completion rate is acceptable.
+ *
+ * Per the AGP-P2 locked decision, this is logged-only — callers may surface
+ * the suggestion to humans, but no auto-commit of new templates is allowed.
+ *
+ * @param fetchCohort — async loader that returns one row per session that
+ *   consumed a prompt with this hash. Keeps this module decoupled from the
+ *   API client so it stays pure-testable.
+ */
+export async function proposePromptVariant(
+  contentHash: string,
+  fetchCohort: (hash: string) => Promise<PromptCohortRow[]>,
+): Promise<PromptVariantSuggestion | null> {
+  if (!contentHash) return null;
+  const cohort = await fetchCohort(contentHash);
+  if (!cohort || cohort.length < VARIANT_MIN_COHORT) return null;
+  const completed = cohort.filter(
+    (r) =>
+      r.status === "completed" &&
+      (r.progressPercent ?? 0) >= 100 &&
+      !r.hadBlockers,
+  ).length;
+  const completionRate = completed / cohort.length;
+  if (completionRate >= VARIANT_COMPLETION_THRESHOLD) return null;
+  const blockerRate =
+    cohort.filter((r) => r.hadBlockers).length / cohort.length;
+  const framingHint =
+    blockerRate >= 0.4
+      ? "Cohort hits frequent blockers — try a more diagnostic framing (require root-cause + repro before any fix)."
+      : "Cohort frequently stalls without finishing — try a more action-forcing framing (smaller subtasks, explicit DoD checklist).";
+  return {
+    contentHash,
+    cohortSize: cohort.length,
+    completionRate,
+    framingHint,
+  };
+}