npm - @oscharko-dev/keiko-memory-capture - Versions diffs - 0.2.0 - Mend

@oscharko-dev/keiko-memory-capture 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

package/dist/.tsbuildinfo +1 -0
package/dist/_constants.d.ts +2 -0
package/dist/_constants.d.ts.map +1 -0
package/dist/_constants.js +6 -0
package/dist/_envelopes.d.ts +30 -0
package/dist/_envelopes.d.ts.map +1 -0
package/dist/_envelopes.js +67 -0
package/dist/capture-safety.d.ts +4 -0
package/dist/capture-safety.d.ts.map +1 -0
package/dist/capture-safety.js +17 -0
package/dist/capture.d.ts +4 -0
package/dist/capture.d.ts.map +1 -0
package/dist/capture.js +66 -0
package/dist/errors.d.ts +6 -0
package/dist/errors.d.ts.map +1 -0
package/dist/errors.js +18 -0
package/dist/index.d.ts +7 -0
package/dist/index.d.ts.map +1 -0
package/dist/index.js +14 -0
package/dist/intent-ambient.d.ts +3 -0
package/dist/intent-ambient.d.ts.map +1 -0
package/dist/intent-ambient.js +112 -0
package/dist/intent-explicit.d.ts +6 -0
package/dist/intent-explicit.d.ts.map +1 -0
package/dist/intent-explicit.js +204 -0
package/dist/intent-workflow.d.ts +3 -0
package/dist/intent-workflow.d.ts.map +1 -0
package/dist/intent-workflow.js +69 -0
package/dist/policy.d.ts +12 -0
package/dist/policy.d.ts.map +1 -0
package/dist/policy.js +43 -0
package/dist/salience.d.ts +13 -0
package/dist/salience.d.ts.map +1 -0
package/dist/salience.js +268 -0
package/dist/scope-inference.d.ts +9 -0
package/dist/scope-inference.d.ts.map +1 -0
package/dist/scope-inference.js +45 -0
package/dist/secret-patterns.d.ts +3 -0
package/dist/secret-patterns.d.ts.map +1 -0
package/dist/secret-patterns.js +124 -0
package/dist/types.d.ts +61 -0
package/dist/types.d.ts.map +1 -0
package/dist/types.js +6 -0
package/dist/version.d.ts +2 -0
package/dist/version.d.ts.map +1 -0
package/dist/version.js +1 -0
package/package.json +31 -0

package/dist/intent-explicit.js ADDED Viewed

@@ -0,0 +1,204 @@
+// Explicit-intent extractors for keiko-memory-capture (Epic #204 child #207).
+//
+// Each `tryExtract*` is a pure function that returns either a CaptureOutcome (one of: candidate,
+// update, forget, supersession, rejected) or `null` for "this text is not this intent kind".
+// The top-level capture function in capture.ts probes them in a fixed order — first non-null
+// wins. Regex patterns are intentionally narrow: ambiguous matches return null so the next
+// extractor (or the no-intent fallthrough) gets a chance.
+//
+// Pure: no clock, no randomness, no IO. All time and IDs come from CaptureContext.
+import { buildForget, buildProposal, buildUpdate } from "./_envelopes.js";
+import { applyPolicy } from "./policy.js";
+import { inferScopeFromContext } from "./scope-inference.js";
+import { scanForSecrets } from "./secret-patterns.js";
+// ─── Regex catalogue (narrow, anchored, single-quantifier) ────────────────────
+// All patterns are anchored and use a single open or bounded quantifier. The phrase trailing
+// the imperative is captured greedily ON A SINGLE LINE (no `s` flag) so embedded newlines
+// terminate the match — this prevents a multi-line paste from being absorbed into one body.
+const REMEMBER_RE = /^\s*remember(?:\s+that)?\s+(.+?)\s*$/i;
+const REMEMBER_ABOUT_RE = /^\s*remember\s+about\s+(?:this\s+(?:project|workspace)[:,\s]+)?(.+?)\s*$/i;
+const FORGET_RE = /^\s*forget(?:\s+about)?\s+(.+?)\s*$/i;
+const UPDATE_RE = /^\s*update\s+(?:memory|the\s+memory)\s+about\s+(.+?)\s+(?:to\s+be|with|:)\s+(.+?)\s*$/i;
+const ACTUALLY_RE = /^\s*actually,?\s+(.+?)\s*$/i;
+const CORRECTION_LABEL_RE = /^\s*correction:\s*(.+?)\s*$/i;
+const THATS_WRONG_RE = /^\s*that(?:'s|\s+is)\s+wrong[,.]?\s+(.+?)\s+(?:is|are|should\s+be)\s+(.+?)\s*$/i;
+// Helper: secret scan + reject the body if it fires. Length enforcement happens in capture.ts
+// preflight before the explicit extractors run.
+function rejectIfUnsafe(body, policy) {
+    const reason = scanForSecrets(body, policy.customerIdentifierMatchers ?? []);
+    if (reason !== null) {
+        return { kind: "rejected", reason };
+    }
+    return null;
+}
+function scopeOrReject(context, policy) {
+    const scope = inferScopeFromContext(context, {
+        ...(policy.scopeKind !== undefined && { scopeKind: policy.scopeKind }),
+        ...(policy.allowGlobalScope !== undefined && { allowGlobalScope: policy.allowGlobalScope }),
+    });
+    if (scope === null) {
+        return { ok: false, outcome: { kind: "rejected", reason: "scope-not-resolvable" } };
+    }
+    return { ok: true, scope };
+}
+// Helper: pick the first resolver-match by id with a defined-narrowed type. Returns the typed
+// id or null when the array is empty or its first slot is somehow undefined (defensive narrow
+// for noUncheckedIndexedAccess; the resolver contract is `readonly MemoryId[]`, not sparse).
+function firstResolvedId(matches) {
+    const head = matches[0];
+    return head ?? null;
+}
+function resolveTarget(policy, target, scope) {
+    const resolver = policy.resolver;
+    if (resolver === undefined) {
+        return { kind: "none" };
+    }
+    const matches = resolver(target, scope);
+    if (matches.length === 0) {
+        return { kind: "none" };
+    }
+    if (matches.length > 1) {
+        return { kind: "ambiguous" };
+    }
+    const head = firstResolvedId(matches);
+    return head === null ? { kind: "none" } : { kind: "unique", memoryId: head };
+}
+// ─── tryExtractRemember ──────────────────────────────────────────────────────
+// "remember about this project: X" → project scope hint. "remember that X" / "remember X" →
+// implicit scope from context. Emits a preference-type proposal — explicit user instructions
+// are the canonical preference source per #205 source-kind taxonomy.
+export function tryExtractRemember(text, context, policy = {}) {
+    const aboutMatch = REMEMBER_ABOUT_RE.exec(text);
+    const plainMatch = aboutMatch === null ? REMEMBER_RE.exec(text) : null;
+    const body = aboutMatch?.[1] ?? plainMatch?.[1];
+    if (body === undefined) {
+        return null;
+    }
+    const rejection = rejectIfUnsafe(body, policy);
+    if (rejection !== null) {
+        return rejection;
+    }
+    const scopeResolution = scopeOrReject(context, policy);
+    if (!scopeResolution.ok) {
+        return scopeResolution.outcome;
+    }
+    const decision = applyPolicy(body, {
+        ...(policy.defaultSensitivity !== undefined && {
+            defaultSensitivity: policy.defaultSensitivity,
+        }),
+    });
+    const proposal = buildProposal({
+        context,
+        scope: scopeResolution.scope,
+        body,
+        type: "preference",
+        sensitivity: decision.sensitivity,
+        sourceKind: "explicit-user-instruction",
+    }, 1.0);
+    return { kind: "candidate", proposal, requiresApproval: decision.requiresApproval };
+}
+// ─── tryExtractForget ────────────────────────────────────────────────────────
+export function tryExtractForget(text, context, policy = {}) {
+    const match = FORGET_RE.exec(text);
+    if (match === null) {
+        return null;
+    }
+    const target = match[1];
+    if (target === undefined) {
+        return null;
+    }
+    const scopeResolution = scopeOrReject(context, policy);
+    if (!scopeResolution.ok) {
+        return scopeResolution.outcome;
+    }
+    const resolved = resolveTarget(policy, target, scopeResolution.scope);
+    if (resolved.kind === "none") {
+        return null;
+    }
+    if (resolved.kind === "ambiguous") {
+        return { kind: "rejected", reason: "ambiguous-forget" };
+    }
+    const operation = buildForget({ context, memoryId: resolved.memoryId, reason: target });
+    return { kind: "forget", operation, requiresConfirmation: true };
+}
+// ─── tryExtractUpdate ────────────────────────────────────────────────────────
+export function tryExtractUpdate(text, context, policy = {}) {
+    const match = UPDATE_RE.exec(text);
+    if (match === null) {
+        return null;
+    }
+    const target = match[1];
+    const newValue = match[2];
+    if (target === undefined || newValue === undefined) {
+        return null;
+    }
+    const rejection = rejectIfUnsafe(newValue, policy);
+    if (rejection !== null) {
+        return rejection;
+    }
+    const scopeResolution = scopeOrReject(context, policy);
+    if (!scopeResolution.ok) {
+        return scopeResolution.outcome;
+    }
+    const resolved = resolveTarget(policy, target, scopeResolution.scope);
+    if (resolved.kind === "none") {
+        return null;
+    }
+    if (resolved.kind === "ambiguous") {
+        return { kind: "rejected", reason: "ambiguous-update" };
+    }
+    const operation = buildUpdate({
+        context,
+        memoryId: resolved.memoryId,
+        bodyPatch: newValue,
+    });
+    return { kind: "update", operation };
+}
+// ─── tryExtractCorrection ─────────────────────────────────────────────────────
+// Emits a correction-type proposal. We do NOT emit a MemorySupersession envelope here:
+// supersession requires knowing the OLD memory id, which requires a resolver lookup analogous
+// to update/forget. A correction proposal is the lowest-friction default — the acceptance
+// layer (#212) can elevate it to a supersession when it knows the prior fact.
+function extractCorrectionBody(text) {
+    const actuallyMatch = ACTUALLY_RE.exec(text);
+    if (actuallyMatch?.[1] !== undefined) {
+        return actuallyMatch[1];
+    }
+    const labelMatch = CORRECTION_LABEL_RE.exec(text);
+    if (labelMatch?.[1] !== undefined) {
+        return labelMatch[1];
+    }
+    const wrongMatch = THATS_WRONG_RE.exec(text);
+    if (wrongMatch?.[1] !== undefined && wrongMatch[2] !== undefined) {
+        return `${wrongMatch[1]} is ${wrongMatch[2]}`;
+    }
+    return null;
+}
+export function tryExtractCorrection(text, context, policy = {}) {
+    const body = extractCorrectionBody(text);
+    if (body === null) {
+        return null;
+    }
+    const rejection = rejectIfUnsafe(body, policy);
+    if (rejection !== null) {
+        return rejection;
+    }
+    const scopeResolution = scopeOrReject(context, policy);
+    if (!scopeResolution.ok) {
+        return scopeResolution.outcome;
+    }
+    const decision = applyPolicy(body, {
+        ...(policy.defaultSensitivity !== undefined && {
+            defaultSensitivity: policy.defaultSensitivity,
+        }),
+    });
+    const proposal = buildProposal({
+        context,
+        scope: scopeResolution.scope,
+        body,
+        type: "correction",
+        sensitivity: decision.sensitivity,
+        sourceKind: "accepted-correction",
+    }, 1.0);
+    return { kind: "candidate", proposal, requiresApproval: decision.requiresApproval };
+}

package/dist/intent-workflow.d.ts ADDED Viewed

@@ -0,0 +1,3 @@
+import type { CaptureContext, CaptureOutcome, CapturePolicyOptions, WorkflowOutcomeInput } from "./types.js";
+export declare function extractWorkflowOutcomeCandidates(outcome: WorkflowOutcomeInput, context: CaptureContext, policy?: CapturePolicyOptions): readonly CaptureOutcome[];
+//# sourceMappingURL=intent-workflow.d.ts.map

package/dist/intent-workflow.d.ts.map ADDED Viewed

	@@ -0,0 +1 @@
1	+ {"version":3,"file":"intent-workflow.d.ts","sourceRoot":"","sources":["../src/intent-workflow.ts"],"names":[],"mappings":"AAiBA,OAAO,KAAK,EACV,cAAc,EACd,cAAc,EACd,oBAAoB,EACpB,oBAAoB,EACrB,MAAM,YAAY,CAAC;AA+DpB,wBAAgB,gCAAgC,CAC9C,OAAO,EAAE,oBAAoB,EAC7B,OAAO,EAAE,cAAc,EACvB,MAAM,GAAE,oBAAyB,GAChC,SAAS,cAAc,EAAE,CAU3B"}

package/dist/intent-workflow.js ADDED Viewed

@@ -0,0 +1,69 @@
+// Workflow-outcome candidate extractor for keiko-memory-capture (Epic #204 child #207).
+//
+// Workflow outcomes are the second canonical capture trigger: a workflow run that completed
+// (success) or that the user reviewed and corrected (corrected) produces a candidate memory.
+// Failed runs are intentionally NOT a learning surface — they would teach the system from
+// incomplete information, which inverts the governance contract.
+//
+// Output sensitivity follows the policy classifier on the structured report. Workflow scope is
+// preferred when the workflow definition id is available (so the memory rides with the workflow,
+// not with a single conversation); we fall back to user scope when no workflow id is present.
+import { buildProposal } from "./_envelopes.js";
+import { applyPolicy } from "./policy.js";
+import { inferScopeFromContext } from "./scope-inference.js";
+import { scanForSecrets } from "./secret-patterns.js";
+// Confidence for workflow-derived candidates. Lower than 1.0 because the system inferred the
+// learning rather than the user stating it explicitly. The retrieval layer (#210) is expected
+// to weight by provenance.confidence; a lower value means a workflow-derived memory ranks below
+// an equivalent explicit-user-instruction memory.
+const WORKFLOW_CONFIDENCE = 0.6;
+// Scope inference for workflow extraction: prefer workflow scope, fall back to user.
+function resolveWorkflowScope(context, policy) {
+    const hint = policy.scopeKind ?? (context.workflowDefinitionId !== undefined ? "workflow" : "user");
+    return inferScopeFromContext(context, {
+        scopeKind: hint,
+        ...(policy.allowGlobalScope !== undefined && { allowGlobalScope: policy.allowGlobalScope }),
+    });
+}
+function emitWorkflowCandidate(body, type, runId, capturedAt, context, policy) {
+    const reason = scanForSecrets(body, policy.customerIdentifierMatchers ?? []);
+    if (reason !== null) {
+        return { kind: "rejected", reason };
+    }
+    const scope = resolveWorkflowScope(context, policy);
+    if (scope === null) {
+        return { kind: "rejected", reason: "scope-not-resolvable" };
+    }
+    const decision = applyPolicy(body, {
+        ...(policy.defaultSensitivity !== undefined && {
+            defaultSensitivity: policy.defaultSensitivity,
+        }),
+    });
+    const sourceKind = type === "correction" ? "accepted-correction" : "workflow-outcome";
+    const proposal = buildProposal({
+        context,
+        scope,
+        body,
+        type,
+        sensitivity: decision.sensitivity,
+        sourceKind,
+        capturedAt,
+        validFrom: capturedAt,
+        sourceWorkflowRunId: runId,
+    }, WORKFLOW_CONFIDENCE);
+    return { kind: "candidate", proposal, requiresApproval: decision.requiresApproval };
+}
+// Pure: no IO, no clock; the structured report and runId come from `outcome`, the clock and id
+// factory come from `context`. `failed` outcomes return `[]` deliberately so a caller iterating
+// many runs sees nothing rather than a placeholder.
+export function extractWorkflowOutcomeCandidates(outcome, context, policy = {}) {
+    if (outcome.outcomeKind === "failed") {
+        return [];
+    }
+    const body = outcome.structuredReport.trim();
+    if (body.length === 0) {
+        return [{ kind: "rejected", reason: "empty-content" }];
+    }
+    const type = outcome.outcomeKind === "corrected" ? "correction" : "semantic-fact";
+    return [emitWorkflowCandidate(body, type, outcome.runId, outcome.capturedAt, context, policy)];
+}

package/dist/policy.d.ts ADDED Viewed

@@ -0,0 +1,12 @@
+import type { MemorySensitivity } from "@oscharko-dev/keiko-contracts/memory";
+interface ApplyPolicyInput {
+    readonly defaultSensitivity?: MemorySensitivity;
+}
+export interface PolicyDecision {
+    readonly sensitivity: MemorySensitivity;
+    readonly requiresApproval: boolean;
+}
+export declare function classifySensitivity(body: string, defaultSensitivity?: MemorySensitivity): MemorySensitivity;
+export declare function applyPolicy(body: string, options?: ApplyPolicyInput): PolicyDecision;
+export {};
+//# sourceMappingURL=policy.d.ts.map

package/dist/policy.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"policy.d.ts","sourceRoot":"","sources":["../src/policy.ts"],"names":[],"mappings":"AAiBA,OAAO,KAAK,EAAE,iBAAiB,EAAE,MAAM,sCAAsC,CAAC;AAU9E,UAAU,gBAAgB;IACxB,QAAQ,CAAC,kBAAkB,CAAC,EAAE,iBAAiB,CAAC;CACjD;AAED,MAAM,WAAW,cAAc;IAC7B,QAAQ,CAAC,WAAW,EAAE,iBAAiB,CAAC;IACxC,QAAQ,CAAC,gBAAgB,EAAE,OAAO,CAAC;CACpC;AAMD,wBAAgB,mBAAmB,CACjC,IAAI,EAAE,MAAM,EACZ,kBAAkB,GAAE,iBAA4B,GAC/C,iBAAiB,CAKnB;AAKD,wBAAgB,WAAW,CAAC,IAAI,EAAE,MAAM,EAAE,OAAO,GAAE,gBAAqB,GAAG,cAAc,CAQxF"}

package/dist/policy.js ADDED Viewed

@@ -0,0 +1,43 @@
+// Sensitivity classification + per-call policy decisions for keiko-memory-capture.
+//
+// Sensitivity is a SOURCE-side label (lives on MemoryProvenance per ADR-0019 contracts) — once
+// assigned, the audit and retention layers honour it without re-classifying. This module's job
+// is therefore two-fold:
+//
+//   1. Pick the right initial sensitivity for a body based on heuristic signals
+//      (contact data, explicit markers).
+//   2. Decide whether the resulting candidate must be gated behind explicit user approval before
+//      it can land in storage. ANY non-public sensitivity flips the approval flag — `confidential`
+//      requires a confirmation prompt; `restricted` is rejected upstream (see capture.ts).
+//
+// The heuristics are deliberately narrow (high precision over high recall) so the layer does not
+// reject benign user memories. The wider secret-rejection net lives in secret-patterns.ts; this
+// module's classifier covers PII-shaped content the secret scanner doesn't catch (email,
+// phone-shape numbers, marker words like "confidential").
+// Linear single-character-class patterns; no nesting. The phone-shape pattern accepts an optional
+// leading +, then 7–14 digits with separators (space, dash, dot) so the total digit run isn't
+// long enough to trip the secret scanner's PAN detector. The email pattern is a conservative
+// local@host shape — anything resembling a routable address triggers `confidential`.
+const EMAIL_RE = /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b/;
+const PHONE_RE = /\+?\d[\d\s.-]{6,14}\d/;
+const CONFIDENTIAL_MARKER_RE = /\b(confidential|internal\s+only|internal[:\s]|private[:\s])/i;
+// Returns the sensitivity class for `body`. `defaultSensitivity` is the floor for benign text —
+// it never DEMOTES a body that triggered a marker. `"restricted"` is intentionally NOT a valid
+// default: a deployment that wants every capture to require approval should pass
+// `"confidential"`. `applyPolicy` enforces this with a thrown CaptureRejection-style error.
+export function classifySensitivity(body, defaultSensitivity = "public") {
+    if (CONFIDENTIAL_MARKER_RE.test(body) || EMAIL_RE.test(body) || PHONE_RE.test(body)) {
+        return "confidential";
+    }
+    return defaultSensitivity;
+}
+// Returns the policy decision for `body`: sensitivity + whether downstream must show an approval
+// prompt. `restricted` is reserved for caller-side rejection (see capture.ts) — passing it as
+// the default would silently swallow the rejection path here, so we throw a programmer-error.
+export function applyPolicy(body, options = {}) {
+    if (options.defaultSensitivity === "restricted") {
+        throw new Error("policy.defaultSensitivity must not be 'restricted'; capture rejects restricted candidates upstream");
+    }
+    const sensitivity = classifySensitivity(body, options.defaultSensitivity);
+    return { sensitivity, requiresApproval: sensitivity !== "public" };
+}

package/dist/salience.d.ts ADDED Viewed

@@ -0,0 +1,13 @@
+import type { CaptureOutcome, SalienceDeps, SalienceInput } from "./types.js";
+interface RawSalienceItem {
+    readonly body: string;
+    readonly type: string;
+    readonly confidence: number;
+    readonly scope: string;
+    readonly tags: readonly string[];
+}
+export declare const SALIENCE_SYSTEM_PROMPT = "You extract durable memories from a chat turn so an assistant can remember the user across future conversations.\n\nReturn ONLY a JSON array (no prose, no markdown fences). Each element:\n{ \"body\": string, \"type\": string, \"confidence\": number, \"scope\": string, \"tags\": string[] }\n\nCapture ONLY facts the USER asserted about THEMSELVES or THEIR work that are durable and worth remembering: identity, stable preferences, project and technology facts, decisions, constraints, goals, environment, team, and recurring workflow lessons. Write each \"body\" as a concise, self-contained, third-person statement (e.g. \"The user is building a fintech app called Atlas in Rust with PostgreSQL\"). Identity statements should be canonicalised the same way every time, for example \"My name is Paul.\" / \"Hallo Keiko, ich bin Paul.\" -> \"The user's name is Paul.\".\n\nCapture LIBERALLY \u2014 the bar is low; when in doubt, include it.\n\nEXCLUDE: questions; one-off ephemeral task requests; anything the ASSISTANT said or suggested (the assistant message is context only, never a source of user facts); general world knowledge; and anything secret or credential-like (passwords, API keys, tokens, private keys).\n\n\"type\" is one of: identity, preference, fact, decision, constraint, goal, lesson, procedural. \"scope\" is one of: user (personal facts/preferences), project (project-specific facts), workspace. \"confidence\" is 0..1. \"tags\" is a short list of lowercase keywords.\n\nIf there is nothing durable to capture, return [].";
+export declare function parseSalienceItems(raw: string): readonly RawSalienceItem[];
+export declare function extractSalientMemories(input: SalienceInput, deps: SalienceDeps): Promise<readonly CaptureOutcome[]>;
+export {};
+//# sourceMappingURL=salience.d.ts.map

package/dist/salience.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"salience.d.ts","sourceRoot":"","sources":["../src/salience.ts"],"names":[],"mappings":"AAmBA,OAAO,KAAK,EAEV,cAAc,EAGd,YAAY,EACZ,aAAa,EACd,MAAM,YAAY,CAAC;AAkBpB,UAAU,eAAe;IACvB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;IAC5B,QAAQ,CAAC,KAAK,EAAE,MAAM,CAAC;IACvB,QAAQ,CAAC,IAAI,EAAE,SAAS,MAAM,EAAE,CAAC;CAClC;AAGD,eAAO,MAAM,sBAAsB,0gDAagB,CAAC;AAwEpD,wBAAgB,kBAAkB,CAAC,GAAG,EAAE,MAAM,GAAG,SAAS,eAAe,EAAE,CAe1E;AAyID,wBAAsB,sBAAsB,CAC1C,KAAK,EAAE,aAAa,EACpB,IAAI,EAAE,YAAY,GACjB,OAAO,CAAC,SAAS,cAAc,EAAE,CAAC,CA+BpC"}

package/dist/salience.js ADDED Viewed

@@ -0,0 +1,268 @@
+// Model-assisted salience capture for keiko-memory-capture.
+//
+// The regex intent extractors (intent-explicit.ts) only fire on imperative phrases ("remember
+// that X"). This module captures DURABLE, salient facts the user asserts in NATURAL conversation
+// ("I'm building a fintech app called Atlas in Rust") that the regex path misses entirely.
+//
+// It is the one extractor that needs the model, so it is async — but the boundary stays thin:
+// exactly one `callModel` await, defensive JSON parsing that can NEVER throw (malformed prose →
+// []), then the same deterministic envelope/secret/scope/policy pipeline the regex path uses.
+//
+// Product intent: the capture bar is LOW (over-capture is acceptable; a later decay/consolidation
+// pass prunes). We still apply the full secret-rejection net and clamp confidence into a band
+// that stays retrievable (>0.3 stale floor) but below the 1.0 reserved for explicit user intent.
+import { MEMORY_BODY_MAX_CHARS_DEFAULT } from "./_constants.js";
+import { buildProposal } from "./_envelopes.js";
+import { applyPolicy } from "./policy.js";
+import { inferScopeFromContext } from "./scope-inference.js";
+import { scanForSecrets } from "./secret-patterns.js";
+// Confidence band: floor 0.4 keeps salience candidates above the 0.3 stale-suppression floor so
+// they remain retrievable; ceiling 0.9 keeps them below the 1.0 reserved for explicit user intent.
+const CONFIDENCE_MIN = 0.4;
+const CONFIDENCE_MAX = 0.9;
+// Hard cap on accepted candidates per turn — over-capture is bounded.
+const MAX_CANDIDATES = 6;
+// Jaccard char-bigram similarity at/above which a candidate is treated as a near-duplicate.
+const DEDUP_THRESHOLD = 0.8;
+// Provenance string surfaced in the MemoriaViva detail view (decision 2). Salience reuses the
+// "system-default" source kind (no dedicated conversation-inferred kind exists yet), so this
+// rationale is the explainability signal that the memory was inferred, not user-instructed.
+const SALIENCE_RATIONALE = "Automatically inferred from conversation (salience capture)";
+// ─── Verbatim extraction prompt ──────────────────────────────────────────────
+export const SALIENCE_SYSTEM_PROMPT = `You extract durable memories from a chat turn so an assistant can remember the user across future conversations.
+Return ONLY a JSON array (no prose, no markdown fences). Each element:
+{ "body": string, "type": string, "confidence": number, "scope": string, "tags": string[] }
+Capture ONLY facts the USER asserted about THEMSELVES or THEIR work that are durable and worth remembering: identity, stable preferences, project and technology facts, decisions, constraints, goals, environment, team, and recurring workflow lessons. Write each "body" as a concise, self-contained, third-person statement (e.g. "The user is building a fintech app called Atlas in Rust with PostgreSQL"). Identity statements should be canonicalised the same way every time, for example "My name is Paul." / "Hallo Keiko, ich bin Paul." -> "The user's name is Paul.".
+Capture LIBERALLY — the bar is low; when in doubt, include it.
+EXCLUDE: questions; one-off ephemeral task requests; anything the ASSISTANT said or suggested (the assistant message is context only, never a source of user facts); general world knowledge; and anything secret or credential-like (passwords, API keys, tokens, private keys).
+"type" is one of: identity, preference, fact, decision, constraint, goal, lesson, procedural. "scope" is one of: user (personal facts/preferences), project (project-specific facts), workspace. "confidence" is 0..1. "tags" is a short list of lowercase keywords.
+If there is nothing durable to capture, return [].`;
+function buildUserPrompt(userText, assistantText) {
+    const assistantBlock = assistantText !== undefined && assistantText.trim().length > 0
+        ? `\n\nAssistant said (CONTEXT ONLY — never a source of user facts):\n${assistantText}`
+        : "";
+    return `User said:\n${userText}${assistantBlock}`;
+}
+// ─── Defensive JSON parsing (never throws) ───────────────────────────────────
+function stripCodeFences(raw) {
+    return raw.replace(/```[a-zA-Z]*\n?/g, "").replace(/```/g, "");
+}
+// Locate the first balanced top-level JSON array. Returns the substring or null. Scans for the
+// first "[" then walks to its matching "]" tracking string literals and escapes so a bracket
+// inside a string value does not close the array early.
+function firstBalancedArray(text) {
+    const start = text.indexOf("[");
+    if (start === -1) {
+        return null;
+    }
+    let depth = 0;
+    let inString = false;
+    let escaped = false;
+    for (let i = start; i < text.length; i += 1) {
+        const ch = text[i];
+        if (escaped) {
+            escaped = false;
+            continue;
+        }
+        if (ch === "\\") {
+            escaped = true;
+            continue;
+        }
+        if (ch === '"') {
+            inString = !inString;
+            continue;
+        }
+        if (inString) {
+            continue;
+        }
+        if (ch === "[") {
+            depth += 1;
+        }
+        else if (ch === "]") {
+            depth -= 1;
+            if (depth === 0) {
+                return text.slice(start, i + 1);
+            }
+        }
+    }
+    return null;
+}
+function isRawSalienceItem(value) {
+    if (typeof value !== "object" || value === null) {
+        return false;
+    }
+    const item = value;
+    return (typeof item.body === "string" &&
+        typeof item.type === "string" &&
+        typeof item.confidence === "number" &&
+        typeof item.scope === "string" &&
+        Array.isArray(item.tags) &&
+        item.tags.every((tag) => typeof tag === "string"));
+}
+// Parse the model output into validated raw items. ANY failure (no array, bad JSON, wrong element
+// shapes) yields [] — capture must never throw into the chat path.
+export function parseSalienceItems(raw) {
+    const arrayText = firstBalancedArray(stripCodeFences(raw));
+    if (arrayText === null) {
+        return [];
+    }
+    let parsed;
+    try {
+        parsed = JSON.parse(arrayText);
+    }
+    catch {
+        return [];
+    }
+    if (!Array.isArray(parsed)) {
+        return [];
+    }
+    return parsed.filter(isRawSalienceItem);
+}
+// ─── Loose-label → contract mapping ──────────────────────────────────────────
+const TYPE_MAP = {
+    identity: "semantic-fact",
+    fact: "semantic-fact",
+    constraint: "semantic-fact",
+    goal: "semantic-fact",
+    environment: "semantic-fact",
+    team: "semantic-fact",
+    preference: "preference",
+    decision: "decision",
+    lesson: "procedural",
+    procedural: "procedural",
+    workflow: "procedural",
+};
+function mapType(loose) {
+    return TYPE_MAP[loose.trim().toLowerCase()] ?? "semantic-fact";
+}
+function mapScopeKind(loose) {
+    const normalized = loose.trim().toLowerCase();
+    if (normalized === "project" || normalized === "workspace") {
+        return normalized;
+    }
+    return "user";
+}
+function clampConfidence(value) {
+    if (Number.isNaN(value)) {
+        return CONFIDENCE_MIN;
+    }
+    return Math.min(CONFIDENCE_MAX, Math.max(CONFIDENCE_MIN, value));
+}
+// ─── Dedup (pure, char-bigram Jaccard) ───────────────────────────────────────
+function normalizeForDedup(body) {
+    return body
+        .toLowerCase()
+        .replace(/[^a-z0-9]+/g, " ")
+        .trim();
+}
+function charBigrams(normalized) {
+    const bigrams = new Set();
+    for (let i = 0; i < normalized.length - 1; i += 1) {
+        bigrams.add(normalized.slice(i, i + 2));
+    }
+    return bigrams;
+}
+function jaccard(a, b) {
+    if (a.size === 0 && b.size === 0) {
+        return 1;
+    }
+    let intersection = 0;
+    for (const bigram of a) {
+        if (b.has(bigram)) {
+            intersection += 1;
+        }
+    }
+    const union = a.size + b.size - intersection;
+    return union === 0 ? 0 : intersection / union;
+}
+function isNearDuplicate(candidate, seen) {
+    for (const existing of seen) {
+        if (jaccard(candidate, existing) >= DEDUP_THRESHOLD) {
+            return true;
+        }
+    }
+    return false;
+}
+// ─── Candidate construction ──────────────────────────────────────────────────
+// Effective context overlays deps-supplied clock/id factories onto the caller context so the
+// scripted test (and the audit ledger) deterministically controls time and ids (decision 3).
+function effectiveContext(input, deps) {
+    return {
+        ...input.context,
+        nowMs: deps.now(),
+        newMemoryId: deps.newMemoryId,
+        newProposalId: deps.newProposalId,
+    };
+}
+// Turns one validated raw item into a candidate outcome, or null when it must be dropped (secret,
+// empty/oversize body, or unresolvable scope). Pure given the effective context.
+function buildCandidate(item, context, policy) {
+    const body = item.body.trim();
+    const max = policy.maxBodyChars ?? MEMORY_BODY_MAX_CHARS_DEFAULT;
+    if (body.length === 0 || body.length > max) {
+        return null;
+    }
+    if (scanForSecrets(body, policy.customerIdentifierMatchers ?? []) !== null) {
+        return null;
+    }
+    const scope = inferScopeFromContext(context, {
+        scopeKind: mapScopeKind(item.scope),
+        ...(policy.allowGlobalScope !== undefined && { allowGlobalScope: policy.allowGlobalScope }),
+    });
+    if (scope === null) {
+        return null;
+    }
+    const decision = applyPolicy(body, {
+        ...(policy.defaultSensitivity !== undefined && {
+            defaultSensitivity: policy.defaultSensitivity,
+        }),
+    });
+    const proposal = buildProposal({
+        context,
+        scope,
+        body,
+        type: mapType(item.type),
+        sensitivity: decision.sensitivity,
+        sourceKind: "system-default",
+        captureRationale: SALIENCE_RATIONALE,
+    }, clampConfidence(item.confidence));
+    return {
+        kind: "candidate",
+        proposal: { ...proposal, tags: [...item.tags] },
+        requiresApproval: decision.requiresApproval,
+    };
+}
+// ─── Public entry point ──────────────────────────────────────────────────────
+export async function extractSalientMemories(input, deps) {
+    if (input.userText.trim().length === 0) {
+        return [];
+    }
+    const raw = await deps.callModel(SALIENCE_SYSTEM_PROMPT, buildUserPrompt(input.userText, input.assistantText));
+    const items = parseSalienceItems(raw);
+    const context = effectiveContext(input, deps);
+    const policy = input.policy ?? {};
+    const seen = input.existingBodies.map((body) => charBigrams(normalizeForDedup(body)));
+    const accepted = [];
+    for (const item of items) {
+        if (accepted.length >= MAX_CANDIDATES) {
+            break;
+        }
+        const candidate = buildCandidate(item, context, policy);
+        if (candidate === null) {
+            continue;
+        }
+        const bigrams = charBigrams(normalizeForDedup(item.body));
+        if (isNearDuplicate(bigrams, seen)) {
+            continue;
+        }
+        seen.push(bigrams);
+        accepted.push(candidate);
+    }
+    return accepted;
+}

package/dist/scope-inference.d.ts ADDED Viewed

@@ -0,0 +1,9 @@
+import type { MemoryScope, MemoryScopeKind } from "@oscharko-dev/keiko-contracts/memory";
+import type { CaptureContext } from "./types.js";
+interface ScopeInferenceOptions {
+    readonly scopeKind?: MemoryScopeKind;
+    readonly allowGlobalScope?: boolean;
+}
+export declare function inferScopeFromContext(context: CaptureContext, options: ScopeInferenceOptions): MemoryScope | null;
+export {};
+//# sourceMappingURL=scope-inference.d.ts.map

package/dist/scope-inference.d.ts.map ADDED Viewed

@@ -0,0 +1 @@

+ {"version":3,"file":"scope-inference.d.ts","sourceRoot":"","sources":["../src/scope-inference.ts"],"names":[],"mappings":"AAMA,OAAO,KAAK,EAAE,WAAW,EAAE,eAAe,EAAE,MAAM,sCAAsC,CAAC;AAEzF,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,YAAY,CAAC;AAEjD,UAAU,qBAAqB;IAC7B,QAAQ,CAAC,SAAS,CAAC,EAAE,eAAe,CAAC;IACrC,QAAQ,CAAC,gBAAgB,CAAC,EAAE,OAAO,CAAC;CACrC;AA4CD,wBAAgB,qBAAqB,CACnC,OAAO,EAAE,cAAc,EACvB,OAAO,EAAE,qBAAqB,GAC7B,WAAW,GAAG,IAAI,CAGpB"}