npm - persnally - Versions diffs - 2.1.0 → 2.3.0 - Mend

persnally 2.1.0 → 2.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (17) hide show

package/build/src/cli.js +25 -2
package/build/src/consolidate.d.ts +1 -0
package/build/src/consolidate.js +5 -1
package/build/src/daemon.js +11 -1
package/build/src/dashboard.html +603 -137
package/build/src/events.d.ts +24 -0
package/build/src/events.js +10 -0
package/build/src/importers/extract.js +12 -1
package/build/src/mcp/index.js +60 -33
package/build/src/prose.d.ts +10 -0
package/build/src/prose.js +34 -0
package/build/src/setup.js +6 -1
package/build/src/store.d.ts +23 -0
package/build/src/store.js +77 -1
package/build/src/stylometry.d.ts +21 -0
package/build/src/stylometry.js +124 -0
package/package.json +1 -1

package/build/src/events.d.ts CHANGED Viewed

@@ -59,6 +59,29 @@ export declare const PAYLOAD_SCHEMAS: {
         proficiency: z.ZodNumber;
         basis: z.ZodString;
     }, z.core.$strip>;
+    readonly "signal.style": z.ZodObject<{
+        dimension: z.ZodEnum<{
+            format: "format";
+            voice: "voice";
+            convention: "convention";
+            emphasis: "emphasis";
+            workflow: "workflow";
+        }>;
+        pattern: z.ZodString;
+        polarity: z.ZodEnum<{
+            does: "does";
+            avoids: "avoids";
+            prefers: "prefers";
+            insists: "insists";
+        }>;
+        confidence: z.ZodNumber;
+        evidence: z.ZodString;
+        basis: z.ZodEnum<{
+            observed: "observed";
+            stylometry: "stylometry";
+            correction: "correction";
+        }>;
+    }, z.core.$strip>;
     readonly "context.read": z.ZodObject<{
         scope: z.ZodString;
         client_purpose: z.ZodString;
@@ -134,6 +157,7 @@ export declare const eventSchema: z.ZodObject<{
         "signal.topic": "signal.topic";
         "signal.assertion": "signal.assertion";
         "signal.skill": "signal.skill";
+        "signal.style": "signal.style";
         "context.read": "context.read";
         "agent.question": "agent.question";
         "agent.answer": "agent.answer";

package/build/src/events.js CHANGED Viewed

@@ -33,6 +33,16 @@ export const PAYLOAD_SCHEMAS = {
         proficiency: z.number().min(0).max(1),
         basis: z.string(),
     }),
+    // How the user writes/works — the prescriptive layer (docs/CONTEXT_DEPTH.md).
+    // Structured so it dedupes by `pattern` and consolidates into stable constants.
+    "signal.style": z.object({
+        dimension: z.enum(["voice", "convention", "emphasis", "format", "workflow"]),
+        pattern: z.string().min(1),
+        polarity: z.enum(["does", "avoids", "prefers", "insists"]),
+        confidence: z.number().min(0).max(1),
+        evidence: z.string(),
+        basis: z.enum(["observed", "stylometry", "correction"]),
+    }),
     "context.read": z.object({
         scope: z.string(),
         client_purpose: z.string(),

package/build/src/importers/extract.js CHANGED Viewed

@@ -5,16 +5,23 @@
 import { z } from "zod";
 import { newEvent, safeIso, uuidv7, PAYLOAD_SCHEMAS } from "../events.js";
 import { anthropicExtract, DEFAULT_EXTRACT_MODEL } from "../llm.js";
+import { proseLines, stripNoise } from "../prose.js";
+import { analyzeVoice } from "../stylometry.js";
 const MAX_CONVO_CHARS = 30_000;
 const topicsExtraction = z.object({ topics: z.array(PAYLOAD_SCHEMAS["signal.topic"]) });
 const assertionsExtraction = z.object({ assertions: z.array(PAYLOAD_SCHEMAS["signal.assertion"]) });
 export async function extractEvents(parsed, opts, extract = anthropicExtract, model = DEFAULT_EXTRACT_MODEL) {
     const batch = uuidv7();
     const events = [];
+    const voiceCorpus = []; // clean prose for the deterministic voice fingerprint
     for (const convo of parsed.conversations) {
         if (!convo.userMessages.length)
             continue;
-        const text = convo.userMessages.join("\n").slice(0, MAX_CONVO_CHARS);
+        const joined = convo.userMessages.join("\n");
+        voiceCorpus.push(...proseLines(joined));
+        const text = stripNoise(joined).slice(0, MAX_CONVO_CHARS); // strip pasted paths/URLs/logs before the LLM sees it
+        if (!text)
+            continue;
         const result = await extract({
             model,
             instruction: "Extract 1-5 topic signals from this conversation's user messages. Weight = centrality, depth = engagement level, sentiment = user's attitude toward the topic. Capture decisions and rejected options as their own signals.",
@@ -42,6 +49,10 @@ export async function extractEvents(parsed, opts, extract = anthropicExtract, mo
             events.push(newEvent("signal.assertion", opts.source, a, { kind: "import", batch, file: opts.file }));
         }
     }
+    // Deterministic voice fingerprint over the user's own prose — no LLM, no tokens.
+    for (const s of analyzeVoice(voiceCorpus).signals) {
+        events.push(newEvent("signal.style", opts.source, s, { kind: "import", batch, file: opts.file }));
+    }
     const span = parsed.conversations.map((c) => c.created_at).sort();
     events.push(newEvent("system.import", "system", {
         importer: opts.importer,

package/build/src/mcp/index.js CHANGED Viewed

@@ -49,38 +49,51 @@ async function recordRead(scope, purpose, items) {
     }
 }
 // ── persnally_track — write path ────────────────────────────
-server.tool("persnally_track", `Track topics and interests from the current conversation to build the user's personal context.
+const TOPIC_SCHEMA = z.object({
+    topic: z.string().describe("The topic, decision, or preference (e.g. 'Rust async programming', 'chose SQLite over Postgres')"),
+    weight: z.number().min(0).max(1),
+    intent: z.enum(["learning", "building", "researching", "deciding", "discussing", "debugging"]),
+    sentiment: z.enum(["positive", "negative", "neutral"]),
+    depth: z.enum(["mention", "moderate", "deep"]),
+    category: z.enum(["technology", "business", "finance", "career", "health", "science", "creative", "education", "lifestyle", "news", "other"]),
+    entities: z.array(z.string()),
+});
+const STYLE_SCHEMA = z.object({
+    dimension: z.enum(["voice", "convention", "emphasis", "format", "workflow"])
+        .describe("voice=tone/phrasing; convention=tools/rules; emphasis=what they insist on; format=structure; workflow=how they work"),
+    pattern: z.string().min(1).describe("a short, reusable instruction — e.g. 'prefers pnpm over npm', 'wants the falsification first', 'terse, no filler'"),
+    polarity: z.enum(["does", "avoids", "prefers", "insists"]),
+    confidence: z.number().min(0).max(1).default(0.6),
+    evidence: z.string().default("").describe("a brief quote or why you believe it"),
+});
+server.tool("persnally_track", `Track what builds the user's lasting context. Two kinds of signal, both optional — send whichever this conversation produced.
-Call this when the user discusses topics they care about — and when they make a decision, accept or reject an option, or express a clear preference, capture that as its own signal rather than folding it into a broader topic.
+TOPICS — what they're engaged with (interests, decisions, accepted/rejected options).
+- 1-5 per conversation; weight = centrality (0.1 brief … 1.0 main focus); depth = mention|moderate|deep; sentiment 'negative' deprioritizes; entities are specific names ("Next.js", not "web framework").
-GUIDELINES:
-- Extract 1-5 signals per conversation, focused on what the user is ACTIVELY engaged with
-- Weight = how central to the conversation (0.1 brief, 1.0 main focus)
-- Depth: "mention" | "moderate" | "deep" (extensive discussion or problem-solving)
-- Sentiment: "negative" means frustration or dislike (deprioritizes, never boosts)
-- Entities are specific names: "Next.js" not "web framework"
+STYLE — HOW they write and work, so every AI can answer like them. High value, but easy to over-send: record only a CLEAR, REPEATED tell, never a one-off, at most 1-3 per conversation. Examples:
+- voice: "terse, no filler" · convention: "prefers pnpm over npm", "no default exports" · emphasis: "wants the falsification first" · format: "answers in bullet points" · workflow: "kills ideas fast".
+- Skip anything generic or already obvious. When unsure, don't.
-The user opted in. Only structured signals are stored, locally, never raw messages.`, {
-    topics: z.array(z.object({
-        topic: z.string().describe("The topic, decision, or preference (e.g. 'Rust async programming', 'chose SQLite over Postgres')"),
-        weight: z.number().min(0).max(1),
-        intent: z.enum(["learning", "building", "researching", "deciding", "discussing", "debugging"]),
-        sentiment: z.enum(["positive", "negative", "neutral"]),
-        depth: z.enum(["mention", "moderate", "deep"]),
-        category: z.enum(["technology", "business", "finance", "career", "health", "science", "creative", "education", "lifestyle", "news", "other"]),
-        entities: z.array(z.string()),
-    })).min(1),
-}, async ({ topics }) => guarded(async () => {
-    logEvent("tool_call", { tool: "persnally_track", topics: topics.length });
+The user opted in. Only these structured signals are stored, locally, never raw messages.`, {
+    topics: z.array(TOPIC_SCHEMA).optional(),
+    style: z.array(STYLE_SCHEMA).optional(),
+}, async ({ topics, style }) => guarded(async () => {
+    logEvent("tool_call", { tool: "persnally_track", topics: topics?.length ?? 0, style: style?.length ?? 0 });
     const client = clientSlug();
-    const events = topics.map((t) => ({
-        type: "signal.topic",
-        source: `mcp:${client}`,
-        payload: t,
-        provenance: { kind: "mcp", client },
-    }));
+    const events = [
+        ...(topics ?? []).map((t) => ({ type: "signal.topic", source: `mcp:${client}`, payload: t, provenance: { kind: "mcp", client } })),
+        ...(style ?? []).map((s) => ({ type: "signal.style", source: `mcp:${client}`, payload: { ...s, basis: "observed" }, provenance: { kind: "mcp", client } })),
+    ];
+    if (!events.length)
+        return text("Nothing to track — pass topics and/or style signals.");
     await daemonPost("/events", events);
-    return text(`Recorded ${topics.length} signal(s): ${topics.map((t) => t.topic).join(", ")}.`);
+    const parts = [];
+    if (topics?.length)
+        parts.push(`${topics.length} topic(s): ${topics.map((t) => t.topic).join(", ")}`);
+    if (style?.length)
+        parts.push(`${style.length} style signal(s)`);
+    return text(`Recorded ${parts.join(" · ")}.`);
 }));
 // ── persnally_context — read path (the Phase 2 core) ────────
 server.tool("persnally_context", `Get the user's personal context: who they are, what they're working on, and their current interests.
@@ -91,11 +104,12 @@ Call this at the START of a conversation (or when personalization would improve
 }, async ({ detail, purpose }) => guarded(async () => {
     logEvent("tool_call", { tool: "persnally_context", detail });
     const client = encodeURIComponent(getClient());
-    const [profile, topics] = await Promise.all([
+    const [profile, topics, voice] = await Promise.all([
         daemonGet(`/profile?client=${client}`),
         daemonGet(`/topics?limit=${detail === "full" ? 25 : 10}&client=${client}`),
+        daemonGet("/voice"),
     ]);
-    if (!profile && !topics?.length) {
+    if (!profile && !topics?.length && !voice?.pack) {
         return text("No context yet — the user hasn't imported data or tracked any signals.");
     }
     let out = "";
@@ -106,6 +120,11 @@ Call this at the START of a conversation (or when personalization would improve
         items += sections.length;
         out += sections.map((s) => `## ${s.title}\n${s.body}`).join("\n\n");
     }
+    // The prescriptive layer: how to write/answer so it fits this user, not a generic one.
+    if (voice?.pack) {
+        out += `${out ? "\n\n" : ""}# How to write for this user\n${voice.pack}`;
+        items += voice.items?.length ?? 0;
+    }
     if (topics?.length) {
         out += `\n\n# Current interests (decay-weighted)\n`;
         out += topics.map((t) => `- ${t.topic} (${t.category}, ${t.dominant_intent}, weight ${t.weight.toFixed(2)})`).join("\n");
@@ -130,17 +149,25 @@ server.tool("persnally_interests", `Show the user their own tracked interest pro
     return text(out);
 }));
 // ── persnally_forget — privacy control ──────────────────────
-server.tool("persnally_forget", `Hard-delete a topic (and everything derived from it) from the user's context, or wipe all data. Privacy control — always honor it.`, {
-    topic: z.string().optional().describe("Topic to remove. Omit with clear_all=true to wipe everything."),
+server.tool("persnally_forget", `Hard-delete a topic or a voice/style pattern (and everything derived from it) from the user's context, or wipe all data. Privacy control — always honor it. A forgotten style pattern stays gone permanently, even if later conversations would otherwise re-observe it.`, {
+    topic: z.string().optional().describe("Topic to remove."),
+    style: z.object({
+        dimension: z.enum(["voice", "convention", "emphasis", "format", "workflow"]),
+        pattern: z.string(),
+    }).optional().describe("A 'How you write' pattern to remove, e.g. {dimension: 'emphasis', pattern: 'be 100% sure'}."),
     clear_all: z.boolean().optional().default(false),
-}, async ({ topic, clear_all }) => guarded(async () => {
+}, async ({ topic, style, clear_all }) => guarded(async () => {
     logEvent("tool_call", { tool: "persnally_forget", clear_all });
     if (clear_all) {
         await daemonDelete("/events?confirm=all");
         return text("All Persnally data deleted. The store is empty.");
     }
+    if (style) {
+        const r = await daemonDelete(`/voice/${encodeURIComponent(style.dimension)}/${encodeURIComponent(style.pattern)}`);
+        return text(r.deleted ? `Forgot "${style.pattern}" — it won't be re-learned.` : `"${style.pattern}" not found.`);
+    }
     if (!topic)
-        return text("Name a topic to forget, or set clear_all.");
+        return text("Name a topic or a style pattern to forget, or set clear_all.");
     const r = await daemonDelete(`/topics/${encodeURIComponent(topic)}`);
     return text(r.deleted ? `Deleted ${r.deleted} event(s) for "${topic}", including derived data.` : `"${topic}" not found.`);
 }));

package/build/src/prose.d.ts ADDED Viewed

@@ -0,0 +1,10 @@
+/**
+ * Corpus hygiene. Imported prompts are polluted with pasted data (file paths,
+ * URLs, JSON/logs) and injected blocks (task notifications, reminders, command
+ * palettes, tool output). Unfiltered, that noise swamps both topic extraction
+ * and the voice fingerprint. See docs/CONTEXT_DEPTH.md.
+ */
+/** Remove injected blocks, fenced code, URLs, and filesystem paths. Keeps prose intact. */
+export declare function stripNoise(text: string): string;
+/** Strict: only the prose lines a human actually wrote — for stylometry. */
+export declare function proseLines(text: string): string[];

package/build/src/prose.js ADDED Viewed

@@ -0,0 +1,34 @@
+/**
+ * Corpus hygiene. Imported prompts are polluted with pasted data (file paths,
+ * URLs, JSON/logs) and injected blocks (task notifications, reminders, command
+ * palettes, tool output). Unfiltered, that noise swamps both topic extraction
+ * and the voice fingerprint. See docs/CONTEXT_DEPTH.md.
+ */
+// A line with at least one of these reads as a sentence, not pasted data.
+const FUNCTION_WORD = /\b(the|a|an|i|to|and|is|it|you|we|that|this|of|for|in|on|do|are|be|can|should|need|want|make|how|what|why|let|so|but|not|just|with|like|now|also|when|if|because|about)\b/;
+/** Remove injected blocks, fenced code, URLs, and filesystem paths. Keeps prose intact. */
+export function stripNoise(text) {
+    return text
+        .replace(/```[\s\S]*?```/g, " ")
+        .replace(/<(?:task-notification|system-reminder|local-command[^>]*|command-[^>]*)>[\s\S]*?<\/[^>]+>/gi, " ")
+        .replace(/<\/?[a-z][^>]*>/gi, " ")
+        .replace(/https?:\/\/\S+/g, " ")
+        .replace(/(?:[~\w.\-]+)?(?:\/[\w.\-]+){2,}\/?/g, " ") // /a/b style paths
+        .replace(/[ \t]{2,}/g, " ")
+        .replace(/\n{3,}/g, "\n\n")
+        .trim();
+}
+/** Strict: only the prose lines a human actually wrote — for stylometry. */
+export function proseLines(text) {
+    return stripNoise(text)
+        .split("\n")
+        .map((l) => l.trim())
+        .filter((ln) => {
+        if (ln.split(/\s+/).length < 2)
+            return false;
+        const letters = (ln.match(/[a-zA-Z]/g) || []).length;
+        if (!ln.length || letters / ln.length < 0.6)
+            return false; // json/logs/ids
+        return FUNCTION_WORD.test(" " + ln.toLowerCase() + " ");
+    });
+}

package/build/src/setup.js CHANGED Viewed

@@ -31,7 +31,12 @@ function zipHasConversations(zipPath) {
         return execFileSync("unzip", ["-l", zipPath], { encoding: "utf-8", stdio: ["ignore", "pipe", "ignore"] })
             .includes("conversations.json");
     }
-    catch {
+    catch (e) {
+        // Only reached on a genuine read failure (unzip missing, corrupt archive,
+        // permission denied) — an ordinary non-matching zip never throws here, so
+        // this can't spam on unrelated Downloads clutter. Surface it: a real export
+        // failing silently is the worst onboarding failure mode there is.
+        console.error(`persnally: couldn't read ${zipPath} (${e instanceof Error ? e.message : e}) — skipping`);
         return false;
     }
 }

package/build/src/store.d.ts CHANGED Viewed

@@ -3,6 +3,7 @@
  * Single source of truth per docs/EVENT_SCHEMA.md; views can always be re-derived.
  */
 import { type PersnallyEvent } from "./events.js";
+import { type StyleSignal } from "./stylometry.js";
 export declare const DEFAULT_DB_PATH: string;
 export interface QueryOpts {
     type?: string;
@@ -53,6 +54,28 @@ export declare class EventStore {
     rebuild(now?: number): void;
     saveProfile(p: StoredProfile): void;
     getProfile(): StoredProfile | null;
+    /** Logical key for one style pattern — stable across re-imports/re-observations. */
+    private styleKey;
+    /** Patterns the user has explicitly forgotten — a delete correction tombstones the key permanently. */
+    private forgottenStyleKeys;
+    /** The voice/convention profile — style signals deduped by pattern (newest wins), richest first, forgotten patterns excluded. */
+    voice(): {
+        pack: string;
+        items: StyleSignal[];
+    };
+    /**
+     * Hard-deletes a style pattern's events and writes a delete correction so it
+     * stays gone even if stylometry or live capture re-derives it later — the
+     * "deletable for real" promise extended to the voice layer.
+     */
+    forgetStyle(dimension: string, pattern: string): number;
+    /** Drops style signals of one basis so a deterministic re-run replaces them (live `observed`/`correction` signals are kept). */
+    clearStyleByBasis(basis: string): number;
+    /**
+     * Consolidation distill: bounds the stored style backlog so live capture
+     * never grows unbounded. Keeps the richest signal per pattern, capped overall.
+     */
+    pruneStyle(maxTotal?: number): number;
     /** Hard-deletes matching topic events plus derived events referencing them, then rebuilds. */
     forgetTopic(topic: string): number;
     /** Removes every event from one import batch — a bad import is fully reversible. */

package/build/src/store.js CHANGED Viewed

@@ -6,8 +6,9 @@ import Database from "better-sqlite3";
 import { mkdirSync } from "node:fs";
 import { dirname, join } from "node:path";
 import { topicWeight } from "./decay.js";
-import { normalizeTopic, validateEvent } from "./events.js";
+import { newEvent, normalizeTopic, validateEvent } from "./events.js";
 import { DATA_DIR } from "./paths.js";
+import { assemblePack } from "./stylometry.js";
 const VIEW_SCHEMA_VERSION = 2;
 export const DEFAULT_DB_PATH = join(DATA_DIR, "persnally.db");
 export class EventStore {
@@ -195,6 +196,81 @@ export class EventStore {
         const row = this.db.prepare("SELECT * FROM view_profile WHERE id = 1").get();
         return row ? { ...row, sections: JSON.parse(row.sections) } : null;
     }
+    /** Logical key for one style pattern — stable across re-imports/re-observations. */
+    styleKey(dimension, pattern) {
+        return `style:${dimension}|${pattern.toLowerCase()}`;
+    }
+    /** Patterns the user has explicitly forgotten — a delete correction tombstones the key permanently. */
+    forgottenStyleKeys() {
+        const forgotten = new Set();
+        for (const e of this.query({ type: "user.correction", limit: 1_000_000 })) {
+            const p = e.payload;
+            if (p.action === "delete" && p.target_id.startsWith("style:"))
+                forgotten.add(p.target_id);
+        }
+        return forgotten;
+    }
+    /** The voice/convention profile — style signals deduped by pattern (newest wins), richest first, forgotten patterns excluded. */
+    voice() {
+        const forgotten = this.forgottenStyleKeys();
+        const byPattern = new Map();
+        // query() returns ts DESC, so the first occurrence of a pattern is the most recent.
+        for (const e of this.query({ type: "signal.style", limit: 1_000_000 })) {
+            const p = e.payload;
+            const key = this.styleKey(p.dimension, p.pattern);
+            if (forgotten.has(key) || byPattern.has(key))
+                continue;
+            byPattern.set(key, p);
+        }
+        // Cap the served set: live `observed` signals accrue over time, so bound it
+        // to the richest few (consolidation prunes the stored backlog separately).
+        const items = [...byPattern.values()].sort((a, b) => b.confidence - a.confidence).slice(0, 28);
+        return { pack: assemblePack(items), items };
+    }
+    /**
+     * Hard-deletes a style pattern's events and writes a delete correction so it
+     * stays gone even if stylometry or live capture re-derives it later — the
+     * "deletable for real" promise extended to the voice layer.
+     */
+    forgetStyle(dimension, pattern) {
+        const key = this.styleKey(dimension, pattern);
+        const candidates = this.query({ type: "signal.style", limit: 1_000_000 }).filter((e) => this.styleKey(e.payload.dimension, e.payload.pattern) === key);
+        const del = this.db.prepare("DELETE FROM events WHERE id = ?");
+        const run = this.db.transaction((toDelete) => { for (const id of toDelete)
+            del.run(id); });
+        run(candidates.map((e) => e.id));
+        this.append([newEvent("user.correction", "dashboard", { target_id: key, action: "delete", reason: "" }, { kind: "local", surface: "dashboard" })]);
+        return candidates.length;
+    }
+    /** Drops style signals of one basis so a deterministic re-run replaces them (live `observed`/`correction` signals are kept). */
+    clearStyleByBasis(basis) {
+        return this.db
+            .prepare("DELETE FROM events WHERE type = 'signal.style' AND json_extract(payload, '$.basis') = ?")
+            .run(basis).changes;
+    }
+    /**
+     * Consolidation distill: bounds the stored style backlog so live capture
+     * never grows unbounded. Keeps the richest signal per pattern, capped overall.
+     */
+    pruneStyle(maxTotal = 80) {
+        const byPattern = new Map();
+        for (const e of this.query({ type: "signal.style", limit: 1_000_000 })) {
+            const p = e.payload;
+            const key = this.styleKey(p.dimension, p.pattern);
+            const existing = byPattern.get(key);
+            if (!existing || existing.payload.confidence < p.confidence)
+                byPattern.set(key, e);
+        }
+        const ranked = [...byPattern.entries()].sort((a, b) => b[1].payload.confidence - a[1].payload.confidence);
+        const keepIds = new Set(ranked.slice(0, maxTotal).map(([, e]) => e.id));
+        const all = this.query({ type: "signal.style", limit: 1_000_000 });
+        const toDelete = all.filter((e) => !keepIds.has(e.id)).map((e) => e.id); // drop weaker duplicates + overflow
+        const del = this.db.prepare("DELETE FROM events WHERE id = ?");
+        const run = this.db.transaction((ids) => { for (const id of ids)
+            del.run(id); });
+        run(toDelete);
+        return toDelete.length;
+    }
     /** Hard-deletes matching topic events plus derived events referencing them, then rebuilds. */
     forgetTopic(topic) {
         const key = normalizeTopic(topic);

package/build/src/stylometry.d.ts ADDED Viewed

@@ -0,0 +1,21 @@
+/**
+ * Deterministic voice fingerprint — no LLM, no tokens, nothing leaves the machine.
+ * Turns the user's own prose (already noise-filtered via prose.ts) into structured
+ * signal.style payloads + a prescriptive "voice" pack. See docs/CONTEXT_DEPTH.md.
+ */
+import { z } from "zod";
+import { PAYLOAD_SCHEMAS } from "./events.js";
+export type StyleSignal = z.infer<(typeof PAYLOAD_SCHEMAS)["signal.style"]>;
+export interface VoiceProfile {
+    signals: StyleSignal[];
+    words: {
+        word: string;
+        count: number;
+    }[];
+    pack: string;
+    prompts: number;
+}
+/** Compute a voice profile from prose messages (each may be multi-line). */
+export declare function analyzeVoice(messages: string[]): VoiceProfile;
+/** Build the system-prompt-ready "voice" line from style signals (shared by import + serving). */
+export declare function assemblePack(signals: StyleSignal[]): string;

package/build/src/stylometry.js ADDED Viewed

@@ -0,0 +1,124 @@
+/**
+ * Deterministic voice fingerprint — no LLM, no tokens, nothing leaves the machine.
+ * Turns the user's own prose (already noise-filtered via prose.ts) into structured
+ * signal.style payloads + a prescriptive "voice" pack. See docs/CONTEXT_DEPTH.md.
+ */
+const STOP = new Set(("a an the and or but if then so of to in on for with at by from as is are was were be been being this that these those it its i you he she we they me my your our their them us do does did done have has had having will would can could should may might must not no yes what which who when where why how all any both each few more most other some such only own same than too very just about into over after before above below up down out off again once here there im ive youre were theyre lets")
+    .split(/\s+/));
+const DIRECTIVE = new Set("make fix add remove create give check use keep build write update ensure confirm let lets do run show change implement refactor delete set move find get take generate review test verify explain tell help put start stop send pull push merge commit"
+    .split(" "));
+const HEDGE = ["maybe", "i think", "probably", "perhaps", "kind of", "sort of", "i guess", "might be", "not sure", "i feel"];
+const EMOJI = /\p{Extended_Pictographic}/gu;
+const tokenize = (s) => s.toLowerCase().match(/[a-z0-9][a-z0-9']*/g) || [];
+const median = (xs) => {
+    if (!xs.length)
+        return 0;
+    const s = [...xs].sort((a, b) => a - b), m = s.length >> 1;
+    return s.length % 2 ? s[m] : (s[m - 1] + s[m]) / 2;
+};
+const allStop = (g) => g.split(" ").every((w) => STOP.has(w) || /^\d+$/.test(w));
+/** Compute a voice profile from prose messages (each may be multi-line). */
+export function analyzeVoice(messages) {
+    if (!messages.length)
+        return { signals: [], words: [], pack: "", prompts: 0 };
+    const uni = new Map(), tri = new Map(), quad = new Map();
+    const sentLens = [];
+    const wordSet = new Set();
+    let total = 0, sent = 0, q = 0, dir = 0, hedge = 0, emoji = 0, lowerI = 0, upperI = 0, please = 0, bulletLines = 0;
+    for (const msg of messages) {
+        emoji += (msg.match(EMOJI) || []).length;
+        lowerI += (msg.match(/(?:^|\s)i(?:'|\s|$)/g) || []).length;
+        upperI += (msg.match(/(?:^|\s)I(?:'|\s|$)/g) || []).length;
+        for (const ln of msg.split("\n"))
+            if (/^\s*[-*•]\s/.test(ln))
+                bulletLines++;
+        const words = tokenize(msg);
+        total += words.length;
+        words.forEach((w) => {
+            wordSet.add(w);
+            if (!STOP.has(w) && w.length >= 4 && !/^\d+$/.test(w))
+                uni.set(w, (uni.get(w) || 0) + 1);
+        });
+        for (let i = 0; i < words.length - 2; i++) {
+            const g = words.slice(i, i + 3).join(" ");
+            tri.set(g, (tri.get(g) || 0) + 1);
+        }
+        for (let i = 0; i < words.length - 3; i++) {
+            const g = words.slice(i, i + 4).join(" ");
+            quad.set(g, (quad.get(g) || 0) + 1);
+        }
+        for (const raw of msg.match(/[^.!?\n]+[.!?]*/g) || []) {
+            const s = raw.trim();
+            if (!s)
+                continue;
+            sent++;
+            const sw = tokenize(s);
+            if (sw.length)
+                sentLens.push(sw.length);
+            if (/\?\s*$/.test(s))
+                q++;
+            const low = " " + s.toLowerCase() + " ";
+            if (HEDGE.some((h) => low.includes(h)))
+                hedge++;
+            if (sw[0] && DIRECTIVE.has(sw[0]))
+                dir++;
+            if (low.includes(" please ") || low.includes(" thanks") || low.includes("thank you"))
+                please++;
+        }
+    }
+    if (!sent)
+        return { signals: [], words: [], pack: "", prompts: messages.length };
+    const minP = Math.max(3, Math.round(messages.length * 0.01));
+    const rate = (n) => n / sent;
+    // distinctive repeated phrases — rank by frequency (tiebreak longer); collapse
+    // overlapping windows of the same phrase by shared-token overlap, not just substring.
+    const phrases = [];
+    const keptTokens = [];
+    for (const [g, c] of [...quad.entries(), ...tri.entries()]
+        .filter(([g, c]) => c >= minP && !allStop(g))
+        .sort((a, b) => b[1] - a[1] || b[0].length - a[0].length)) {
+        const gt = g.split(" ");
+        if (keptTokens.some((k) => gt.filter((w) => k.has(w)).length >= 2))
+            continue; // same phrase, different window
+        phrases.push({ phrase: g, count: c });
+        keptTokens.push(new Set(gt));
+        if (phrases.length >= 8)
+            break;
+    }
+    const signals = [];
+    const med = median(sentLens);
+    const add = (dimension, pattern, polarity, confidence, evidence) => signals.push({ dimension, pattern, polarity, confidence: Math.round(confidence * 100) / 100, evidence, basis: "stylometry" });
+    // tone constants
+    if (med <= 11)
+        add("voice", "terse — short, declarative sentences", "does", 0.85, `median ${med} words/sentence`);
+    else if (med >= 18)
+        add("voice", "writes in long, detailed sentences", "does", 0.8, `median ${med} words/sentence`);
+    if (rate(dir) > 0.15)
+        add("voice", "leads with imperatives, minimal preamble", "does", 0.75, `${Math.round(rate(dir) * 100)}% of sentences open with a command verb`);
+    if (rate(hedge) < 0.05)
+        add("voice", "states things flatly; rarely hedges", "does", 0.8, `hedging in ${Math.round(rate(hedge) * 100)}% of sentences`);
+    if (emoji / messages.length < 0.02)
+        add("format", "no emoji", "avoids", 0.7, `${emoji} emoji across ${messages.length} prompts`);
+    if (lowerI > upperI * 1.3)
+        add("format", "casual register — lowercases “i”", "does", 0.7, `“i” ${lowerI}× vs “I” ${upperI}×`);
+    if (please < messages.length * 0.05)
+        add("voice", "skips pleasantries", "does", 0.6, `${please} please/thanks across ${messages.length} prompts`);
+    if (bulletLines > messages.length * 0.25)
+        add("format", "structures answers with bullet points", "prefers", 0.65, `${bulletLines} bulleted lines`);
+    // recurring phrasing → emphasis (these tend to be the user's repeated instructions/values)
+    for (const { phrase, count } of phrases)
+        add("emphasis", phrase, "insists", Math.min(0.9, 0.5 + count / (minP * 6)), `${count}×`);
+    const words = [...uni.entries()].filter(([, c]) => c >= minP).sort((a, b) => b[1] - a[1]).slice(0, 18).map(([word, count]) => ({ word, count }));
+    return { signals, words, pack: assemblePack(signals), prompts: messages.length };
+}
+/** Build the system-prompt-ready "voice" line from style signals (shared by import + serving). */
+export function assemblePack(signals) {
+    const tone = signals.filter((s) => s.dimension !== "emphasis").map((s) => s.pattern);
+    const phrases = signals.filter((s) => s.dimension === "emphasis").map((s) => `“${s.pattern}”`);
+    if (!tone.length && !phrases.length)
+        return "";
+    const parts = [...tone];
+    if (phrases.length)
+        parts.push(`recurring phrasing: ${phrases.slice(0, 5).join(", ")}`);
+    return `Write like this user: ${parts.join("; ")}.`;
+}

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "persnally",
-  "version": "2.1.0",
+  "version": "2.3.0",
   "license": "FSL-1.1-MIT",
   "description": "Your own context engine — local-first, across every AI. So every AI finally knows you.",
   "type": "module",