persnally 2.0.3 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -59,6 +59,29 @@ export declare const PAYLOAD_SCHEMAS: {
59
59
  proficiency: z.ZodNumber;
60
60
  basis: z.ZodString;
61
61
  }, z.core.$strip>;
62
+ readonly "signal.style": z.ZodObject<{
63
+ dimension: z.ZodEnum<{
64
+ format: "format";
65
+ voice: "voice";
66
+ convention: "convention";
67
+ emphasis: "emphasis";
68
+ workflow: "workflow";
69
+ }>;
70
+ pattern: z.ZodString;
71
+ polarity: z.ZodEnum<{
72
+ does: "does";
73
+ avoids: "avoids";
74
+ prefers: "prefers";
75
+ insists: "insists";
76
+ }>;
77
+ confidence: z.ZodNumber;
78
+ evidence: z.ZodString;
79
+ basis: z.ZodEnum<{
80
+ observed: "observed";
81
+ stylometry: "stylometry";
82
+ correction: "correction";
83
+ }>;
84
+ }, z.core.$strip>;
62
85
  readonly "context.read": z.ZodObject<{
63
86
  scope: z.ZodString;
64
87
  client_purpose: z.ZodString;
@@ -134,6 +157,7 @@ export declare const eventSchema: z.ZodObject<{
134
157
  "signal.topic": "signal.topic";
135
158
  "signal.assertion": "signal.assertion";
136
159
  "signal.skill": "signal.skill";
160
+ "signal.style": "signal.style";
137
161
  "context.read": "context.read";
138
162
  "agent.question": "agent.question";
139
163
  "agent.answer": "agent.answer";
@@ -33,6 +33,16 @@ export const PAYLOAD_SCHEMAS = {
33
33
  proficiency: z.number().min(0).max(1),
34
34
  basis: z.string(),
35
35
  }),
36
+ // How the user writes/works — the prescriptive layer (docs/CONTEXT_DEPTH.md).
37
+ // Structured so it dedupes by `pattern` and consolidates into stable constants.
38
+ "signal.style": z.object({
39
+ dimension: z.enum(["voice", "convention", "emphasis", "format", "workflow"]),
40
+ pattern: z.string().min(1),
41
+ polarity: z.enum(["does", "avoids", "prefers", "insists"]),
42
+ confidence: z.number().min(0).max(1),
43
+ evidence: z.string(),
44
+ basis: z.enum(["observed", "stylometry", "correction"]),
45
+ }),
36
46
  "context.read": z.object({
37
47
  scope: z.string(),
38
48
  client_purpose: z.string(),
@@ -5,16 +5,23 @@
5
5
  import { z } from "zod";
6
6
  import { newEvent, safeIso, uuidv7, PAYLOAD_SCHEMAS } from "../events.js";
7
7
  import { anthropicExtract, DEFAULT_EXTRACT_MODEL } from "../llm.js";
8
+ import { proseLines, stripNoise } from "../prose.js";
9
+ import { analyzeVoice } from "../stylometry.js";
8
10
  const MAX_CONVO_CHARS = 30_000;
9
11
  const topicsExtraction = z.object({ topics: z.array(PAYLOAD_SCHEMAS["signal.topic"]) });
10
12
  const assertionsExtraction = z.object({ assertions: z.array(PAYLOAD_SCHEMAS["signal.assertion"]) });
11
13
  export async function extractEvents(parsed, opts, extract = anthropicExtract, model = DEFAULT_EXTRACT_MODEL) {
12
14
  const batch = uuidv7();
13
15
  const events = [];
16
+ const voiceCorpus = []; // clean prose for the deterministic voice fingerprint
14
17
  for (const convo of parsed.conversations) {
15
18
  if (!convo.userMessages.length)
16
19
  continue;
17
- const text = convo.userMessages.join("\n").slice(0, MAX_CONVO_CHARS);
20
+ const joined = convo.userMessages.join("\n");
21
+ voiceCorpus.push(...proseLines(joined));
22
+ const text = stripNoise(joined).slice(0, MAX_CONVO_CHARS); // strip pasted paths/URLs/logs before the LLM sees it
23
+ if (!text)
24
+ continue;
18
25
  const result = await extract({
19
26
  model,
20
27
  instruction: "Extract 1-5 topic signals from this conversation's user messages. Weight = centrality, depth = engagement level, sentiment = user's attitude toward the topic. Capture decisions and rejected options as their own signals.",
@@ -42,6 +49,10 @@ export async function extractEvents(parsed, opts, extract = anthropicExtract, mo
42
49
  events.push(newEvent("signal.assertion", opts.source, a, { kind: "import", batch, file: opts.file }));
43
50
  }
44
51
  }
52
+ // Deterministic voice fingerprint over the user's own prose — no LLM, no tokens.
53
+ for (const s of analyzeVoice(voiceCorpus).signals) {
54
+ events.push(newEvent("signal.style", opts.source, s, { kind: "import", batch, file: opts.file }));
55
+ }
45
56
  const span = parsed.conversations.map((c) => c.created_at).sort();
46
57
  events.push(newEvent("system.import", "system", {
47
58
  importer: opts.importer,
@@ -49,38 +49,51 @@ async function recordRead(scope, purpose, items) {
49
49
  }
50
50
  }
51
51
  // ── persnally_track — write path ────────────────────────────
52
- server.tool("persnally_track", `Track topics and interests from the current conversation to build the user's personal context.
52
+ const TOPIC_SCHEMA = z.object({
53
+ topic: z.string().describe("The topic, decision, or preference (e.g. 'Rust async programming', 'chose SQLite over Postgres')"),
54
+ weight: z.number().min(0).max(1),
55
+ intent: z.enum(["learning", "building", "researching", "deciding", "discussing", "debugging"]),
56
+ sentiment: z.enum(["positive", "negative", "neutral"]),
57
+ depth: z.enum(["mention", "moderate", "deep"]),
58
+ category: z.enum(["technology", "business", "finance", "career", "health", "science", "creative", "education", "lifestyle", "news", "other"]),
59
+ entities: z.array(z.string()),
60
+ });
61
+ const STYLE_SCHEMA = z.object({
62
+ dimension: z.enum(["voice", "convention", "emphasis", "format", "workflow"])
63
+ .describe("voice=tone/phrasing; convention=tools/rules; emphasis=what they insist on; format=structure; workflow=how they work"),
64
+ pattern: z.string().min(1).describe("a short, reusable instruction — e.g. 'prefers pnpm over npm', 'wants the falsification first', 'terse, no filler'"),
65
+ polarity: z.enum(["does", "avoids", "prefers", "insists"]),
66
+ confidence: z.number().min(0).max(1).default(0.6),
67
+ evidence: z.string().default("").describe("a brief quote or why you believe it"),
68
+ });
69
+ server.tool("persnally_track", `Track what builds the user's lasting context. Two kinds of signal, both optional — send whichever this conversation produced.
53
70
 
54
- Call this when the user discusses topics they care about and when they make a decision, accept or reject an option, or express a clear preference, capture that as its own signal rather than folding it into a broader topic.
71
+ TOPICSwhat they're engaged with (interests, decisions, accepted/rejected options).
72
+ - 1-5 per conversation; weight = centrality (0.1 brief … 1.0 main focus); depth = mention|moderate|deep; sentiment 'negative' deprioritizes; entities are specific names ("Next.js", not "web framework").
55
73
 
56
- GUIDELINES:
57
- - Extract 1-5 signals per conversation, focused on what the user is ACTIVELY engaged with
58
- - Weight = how central to the conversation (0.1 brief, 1.0 main focus)
59
- - Depth: "mention" | "moderate" | "deep" (extensive discussion or problem-solving)
60
- - Sentiment: "negative" means frustration or dislike (deprioritizes, never boosts)
61
- - Entities are specific names: "Next.js" not "web framework"
74
+ STYLE — HOW they write and work, so every AI can answer like them. High value, but easy to over-send: record only a CLEAR, REPEATED tell, never a one-off, at most 1-3 per conversation. Examples:
75
+ - voice: "terse, no filler" · convention: "prefers pnpm over npm", "no default exports" · emphasis: "wants the falsification first" · format: "answers in bullet points" · workflow: "kills ideas fast".
76
+ - Skip anything generic or already obvious. When unsure, don't.
62
77
 
63
- The user opted in. Only structured signals are stored, locally, never raw messages.`, {
64
- topics: z.array(z.object({
65
- topic: z.string().describe("The topic, decision, or preference (e.g. 'Rust async programming', 'chose SQLite over Postgres')"),
66
- weight: z.number().min(0).max(1),
67
- intent: z.enum(["learning", "building", "researching", "deciding", "discussing", "debugging"]),
68
- sentiment: z.enum(["positive", "negative", "neutral"]),
69
- depth: z.enum(["mention", "moderate", "deep"]),
70
- category: z.enum(["technology", "business", "finance", "career", "health", "science", "creative", "education", "lifestyle", "news", "other"]),
71
- entities: z.array(z.string()),
72
- })).min(1),
73
- }, async ({ topics }) => guarded(async () => {
74
- logEvent("tool_call", { tool: "persnally_track", topics: topics.length });
78
+ The user opted in. Only these structured signals are stored, locally, never raw messages.`, {
79
+ topics: z.array(TOPIC_SCHEMA).optional(),
80
+ style: z.array(STYLE_SCHEMA).optional(),
81
+ }, async ({ topics, style }) => guarded(async () => {
82
+ logEvent("tool_call", { tool: "persnally_track", topics: topics?.length ?? 0, style: style?.length ?? 0 });
75
83
  const client = clientSlug();
76
- const events = topics.map((t) => ({
77
- type: "signal.topic",
78
- source: `mcp:${client}`,
79
- payload: t,
80
- provenance: { kind: "mcp", client },
81
- }));
84
+ const events = [
85
+ ...(topics ?? []).map((t) => ({ type: "signal.topic", source: `mcp:${client}`, payload: t, provenance: { kind: "mcp", client } })),
86
+ ...(style ?? []).map((s) => ({ type: "signal.style", source: `mcp:${client}`, payload: { ...s, basis: "observed" }, provenance: { kind: "mcp", client } })),
87
+ ];
88
+ if (!events.length)
89
+ return text("Nothing to track — pass topics and/or style signals.");
82
90
  await daemonPost("/events", events);
83
- return text(`Recorded ${topics.length} signal(s): ${topics.map((t) => t.topic).join(", ")}.`);
91
+ const parts = [];
92
+ if (topics?.length)
93
+ parts.push(`${topics.length} topic(s): ${topics.map((t) => t.topic).join(", ")}`);
94
+ if (style?.length)
95
+ parts.push(`${style.length} style signal(s)`);
96
+ return text(`Recorded ${parts.join(" · ")}.`);
84
97
  }));
85
98
  // ── persnally_context — read path (the Phase 2 core) ────────
86
99
  server.tool("persnally_context", `Get the user's personal context: who they are, what they're working on, and their current interests.
@@ -91,11 +104,12 @@ Call this at the START of a conversation (or when personalization would improve
91
104
  }, async ({ detail, purpose }) => guarded(async () => {
92
105
  logEvent("tool_call", { tool: "persnally_context", detail });
93
106
  const client = encodeURIComponent(getClient());
94
- const [profile, topics] = await Promise.all([
107
+ const [profile, topics, voice] = await Promise.all([
95
108
  daemonGet(`/profile?client=${client}`),
96
109
  daemonGet(`/topics?limit=${detail === "full" ? 25 : 10}&client=${client}`),
110
+ daemonGet("/voice"),
97
111
  ]);
98
- if (!profile && !topics?.length) {
112
+ if (!profile && !topics?.length && !voice?.pack) {
99
113
  return text("No context yet — the user hasn't imported data or tracked any signals.");
100
114
  }
101
115
  let out = "";
@@ -106,6 +120,11 @@ Call this at the START of a conversation (or when personalization would improve
106
120
  items += sections.length;
107
121
  out += sections.map((s) => `## ${s.title}\n${s.body}`).join("\n\n");
108
122
  }
123
+ // The prescriptive layer: how to write/answer so it fits this user, not a generic one.
124
+ if (voice?.pack) {
125
+ out += `${out ? "\n\n" : ""}# How to write for this user\n${voice.pack}`;
126
+ items += voice.items?.length ?? 0;
127
+ }
109
128
  if (topics?.length) {
110
129
  out += `\n\n# Current interests (decay-weighted)\n`;
111
130
  out += topics.map((t) => `- ${t.topic} (${t.category}, ${t.dominant_intent}, weight ${t.weight.toFixed(2)})`).join("\n");
@@ -0,0 +1,10 @@
1
+ /**
2
+ * Corpus hygiene. Imported prompts are polluted with pasted data (file paths,
3
+ * URLs, JSON/logs) and injected blocks (task notifications, reminders, command
4
+ * palettes, tool output). Unfiltered, that noise swamps both topic extraction
5
+ * and the voice fingerprint. See docs/CONTEXT_DEPTH.md.
6
+ */
7
+ /** Remove injected blocks, fenced code, URLs, and filesystem paths. Keeps prose intact. */
8
+ export declare function stripNoise(text: string): string;
9
+ /** Strict: only the prose lines a human actually wrote — for stylometry. */
10
+ export declare function proseLines(text: string): string[];
@@ -0,0 +1,34 @@
1
+ /**
2
+ * Corpus hygiene. Imported prompts are polluted with pasted data (file paths,
3
+ * URLs, JSON/logs) and injected blocks (task notifications, reminders, command
4
+ * palettes, tool output). Unfiltered, that noise swamps both topic extraction
5
+ * and the voice fingerprint. See docs/CONTEXT_DEPTH.md.
6
+ */
7
+ // A line with at least one of these reads as a sentence, not pasted data.
8
+ const FUNCTION_WORD = /\b(the|a|an|i|to|and|is|it|you|we|that|this|of|for|in|on|do|are|be|can|should|need|want|make|how|what|why|let|so|but|not|just|with|like|now|also|when|if|because|about)\b/;
9
+ /** Remove injected blocks, fenced code, URLs, and filesystem paths. Keeps prose intact. */
10
+ export function stripNoise(text) {
11
+ return text
12
+ .replace(/```[\s\S]*?```/g, " ")
13
+ .replace(/<(?:task-notification|system-reminder|local-command[^>]*|command-[^>]*)>[\s\S]*?<\/[^>]+>/gi, " ")
14
+ .replace(/<\/?[a-z][^>]*>/gi, " ")
15
+ .replace(/https?:\/\/\S+/g, " ")
16
+ .replace(/(?:[~\w.\-]+)?(?:\/[\w.\-]+){2,}\/?/g, " ") // /a/b style paths
17
+ .replace(/[ \t]{2,}/g, " ")
18
+ .replace(/\n{3,}/g, "\n\n")
19
+ .trim();
20
+ }
21
+ /** Strict: only the prose lines a human actually wrote — for stylometry. */
22
+ export function proseLines(text) {
23
+ return stripNoise(text)
24
+ .split("\n")
25
+ .map((l) => l.trim())
26
+ .filter((ln) => {
27
+ if (ln.split(/\s+/).length < 2)
28
+ return false;
29
+ const letters = (ln.match(/[a-zA-Z]/g) || []).length;
30
+ if (!ln.length || letters / ln.length < 0.6)
31
+ return false; // json/logs/ids
32
+ return FUNCTION_WORD.test(" " + ln.toLowerCase() + " ");
33
+ });
34
+ }
@@ -3,6 +3,7 @@
3
3
  * Single source of truth per docs/EVENT_SCHEMA.md; views can always be re-derived.
4
4
  */
5
5
  import { type PersnallyEvent } from "./events.js";
6
+ import { type StyleSignal } from "./stylometry.js";
6
7
  export declare const DEFAULT_DB_PATH: string;
7
8
  export interface QueryOpts {
8
9
  type?: string;
@@ -53,6 +54,13 @@ export declare class EventStore {
53
54
  rebuild(now?: number): void;
54
55
  saveProfile(p: StoredProfile): void;
55
56
  getProfile(): StoredProfile | null;
57
+ /** The voice/convention profile — style signals deduped by pattern (newest wins), richest first. */
58
+ voice(): {
59
+ pack: string;
60
+ items: StyleSignal[];
61
+ };
62
+ /** Drops style signals of one basis so a deterministic re-run replaces them (live `observed`/`correction` signals are kept). */
63
+ clearStyleByBasis(basis: string): number;
56
64
  /** Hard-deletes matching topic events plus derived events referencing them, then rebuilds. */
57
65
  forgetTopic(topic: string): number;
58
66
  /** Removes every event from one import batch — a bad import is fully reversible. */
@@ -8,6 +8,7 @@ import { dirname, join } from "node:path";
8
8
  import { topicWeight } from "./decay.js";
9
9
  import { normalizeTopic, validateEvent } from "./events.js";
10
10
  import { DATA_DIR } from "./paths.js";
11
+ import { assemblePack } from "./stylometry.js";
11
12
  const VIEW_SCHEMA_VERSION = 2;
12
13
  export const DEFAULT_DB_PATH = join(DATA_DIR, "persnally.db");
13
14
  export class EventStore {
@@ -195,6 +196,27 @@ export class EventStore {
195
196
  const row = this.db.prepare("SELECT * FROM view_profile WHERE id = 1").get();
196
197
  return row ? { ...row, sections: JSON.parse(row.sections) } : null;
197
198
  }
199
+ /** The voice/convention profile — style signals deduped by pattern (newest wins), richest first. */
200
+ voice() {
201
+ const byPattern = new Map();
202
+ // query() returns ts DESC, so the first occurrence of a pattern is the most recent.
203
+ for (const e of this.query({ type: "signal.style", limit: 1_000_000 })) {
204
+ const p = e.payload;
205
+ const key = `${p.dimension}|${p.pattern.toLowerCase()}`;
206
+ if (!byPattern.has(key))
207
+ byPattern.set(key, p);
208
+ }
209
+ // Cap the served set: live `observed` signals accrue over time, so bound it
210
+ // to the richest few (consolidation distills further in Slice 3).
211
+ const items = [...byPattern.values()].sort((a, b) => b.confidence - a.confidence).slice(0, 28);
212
+ return { pack: assemblePack(items), items };
213
+ }
214
+ /** Drops style signals of one basis so a deterministic re-run replaces them (live `observed`/`correction` signals are kept). */
215
+ clearStyleByBasis(basis) {
216
+ return this.db
217
+ .prepare("DELETE FROM events WHERE type = 'signal.style' AND json_extract(payload, '$.basis') = ?")
218
+ .run(basis).changes;
219
+ }
198
220
  /** Hard-deletes matching topic events plus derived events referencing them, then rebuilds. */
199
221
  forgetTopic(topic) {
200
222
  const key = normalizeTopic(topic);
@@ -0,0 +1,21 @@
1
+ /**
2
+ * Deterministic voice fingerprint — no LLM, no tokens, nothing leaves the machine.
3
+ * Turns the user's own prose (already noise-filtered via prose.ts) into structured
4
+ * signal.style payloads + a prescriptive "voice" pack. See docs/CONTEXT_DEPTH.md.
5
+ */
6
+ import { z } from "zod";
7
+ import { PAYLOAD_SCHEMAS } from "./events.js";
8
+ export type StyleSignal = z.infer<(typeof PAYLOAD_SCHEMAS)["signal.style"]>;
9
+ export interface VoiceProfile {
10
+ signals: StyleSignal[];
11
+ words: {
12
+ word: string;
13
+ count: number;
14
+ }[];
15
+ pack: string;
16
+ prompts: number;
17
+ }
18
+ /** Compute a voice profile from prose messages (each may be multi-line). */
19
+ export declare function analyzeVoice(messages: string[]): VoiceProfile;
20
+ /** Build the system-prompt-ready "voice" line from style signals (shared by import + serving). */
21
+ export declare function assemblePack(signals: StyleSignal[]): string;
@@ -0,0 +1,124 @@
1
+ /**
2
+ * Deterministic voice fingerprint — no LLM, no tokens, nothing leaves the machine.
3
+ * Turns the user's own prose (already noise-filtered via prose.ts) into structured
4
+ * signal.style payloads + a prescriptive "voice" pack. See docs/CONTEXT_DEPTH.md.
5
+ */
6
+ const STOP = new Set(("a an the and or but if then so of to in on for with at by from as is are was were be been being this that these those it its i you he she we they me my your our their them us do does did done have has had having will would can could should may might must not no yes what which who when where why how all any both each few more most other some such only own same than too very just about into over after before above below up down out off again once here there im ive youre were theyre lets")
7
+ .split(/\s+/));
8
+ const DIRECTIVE = new Set("make fix add remove create give check use keep build write update ensure confirm let lets do run show change implement refactor delete set move find get take generate review test verify explain tell help put start stop send pull push merge commit"
9
+ .split(" "));
10
+ const HEDGE = ["maybe", "i think", "probably", "perhaps", "kind of", "sort of", "i guess", "might be", "not sure", "i feel"];
11
+ const EMOJI = /\p{Extended_Pictographic}/gu;
12
+ const tokenize = (s) => s.toLowerCase().match(/[a-z0-9][a-z0-9']*/g) || [];
13
+ const median = (xs) => {
14
+ if (!xs.length)
15
+ return 0;
16
+ const s = [...xs].sort((a, b) => a - b), m = s.length >> 1;
17
+ return s.length % 2 ? s[m] : (s[m - 1] + s[m]) / 2;
18
+ };
19
+ const allStop = (g) => g.split(" ").every((w) => STOP.has(w) || /^\d+$/.test(w));
20
+ /** Compute a voice profile from prose messages (each may be multi-line). */
21
+ export function analyzeVoice(messages) {
22
+ if (!messages.length)
23
+ return { signals: [], words: [], pack: "", prompts: 0 };
24
+ const uni = new Map(), tri = new Map(), quad = new Map();
25
+ const sentLens = [];
26
+ const wordSet = new Set();
27
+ let total = 0, sent = 0, q = 0, dir = 0, hedge = 0, emoji = 0, lowerI = 0, upperI = 0, please = 0, bulletLines = 0;
28
+ for (const msg of messages) {
29
+ emoji += (msg.match(EMOJI) || []).length;
30
+ lowerI += (msg.match(/(?:^|\s)i(?:'|\s|$)/g) || []).length;
31
+ upperI += (msg.match(/(?:^|\s)I(?:'|\s|$)/g) || []).length;
32
+ for (const ln of msg.split("\n"))
33
+ if (/^\s*[-*•]\s/.test(ln))
34
+ bulletLines++;
35
+ const words = tokenize(msg);
36
+ total += words.length;
37
+ words.forEach((w) => {
38
+ wordSet.add(w);
39
+ if (!STOP.has(w) && w.length >= 4 && !/^\d+$/.test(w))
40
+ uni.set(w, (uni.get(w) || 0) + 1);
41
+ });
42
+ for (let i = 0; i < words.length - 2; i++) {
43
+ const g = words.slice(i, i + 3).join(" ");
44
+ tri.set(g, (tri.get(g) || 0) + 1);
45
+ }
46
+ for (let i = 0; i < words.length - 3; i++) {
47
+ const g = words.slice(i, i + 4).join(" ");
48
+ quad.set(g, (quad.get(g) || 0) + 1);
49
+ }
50
+ for (const raw of msg.match(/[^.!?\n]+[.!?]*/g) || []) {
51
+ const s = raw.trim();
52
+ if (!s)
53
+ continue;
54
+ sent++;
55
+ const sw = tokenize(s);
56
+ if (sw.length)
57
+ sentLens.push(sw.length);
58
+ if (/\?\s*$/.test(s))
59
+ q++;
60
+ const low = " " + s.toLowerCase() + " ";
61
+ if (HEDGE.some((h) => low.includes(h)))
62
+ hedge++;
63
+ if (sw[0] && DIRECTIVE.has(sw[0]))
64
+ dir++;
65
+ if (low.includes(" please ") || low.includes(" thanks") || low.includes("thank you"))
66
+ please++;
67
+ }
68
+ }
69
+ if (!sent)
70
+ return { signals: [], words: [], pack: "", prompts: messages.length };
71
+ const minP = Math.max(3, Math.round(messages.length * 0.01));
72
+ const rate = (n) => n / sent;
73
+ // distinctive repeated phrases — rank by frequency (tiebreak longer); collapse
74
+ // overlapping windows of the same phrase by shared-token overlap, not just substring.
75
+ const phrases = [];
76
+ const keptTokens = [];
77
+ for (const [g, c] of [...quad.entries(), ...tri.entries()]
78
+ .filter(([g, c]) => c >= minP && !allStop(g))
79
+ .sort((a, b) => b[1] - a[1] || b[0].length - a[0].length)) {
80
+ const gt = g.split(" ");
81
+ if (keptTokens.some((k) => gt.filter((w) => k.has(w)).length >= 2))
82
+ continue; // same phrase, different window
83
+ phrases.push({ phrase: g, count: c });
84
+ keptTokens.push(new Set(gt));
85
+ if (phrases.length >= 8)
86
+ break;
87
+ }
88
+ const signals = [];
89
+ const med = median(sentLens);
90
+ const add = (dimension, pattern, polarity, confidence, evidence) => signals.push({ dimension, pattern, polarity, confidence: Math.round(confidence * 100) / 100, evidence, basis: "stylometry" });
91
+ // tone constants
92
+ if (med <= 11)
93
+ add("voice", "terse — short, declarative sentences", "does", 0.85, `median ${med} words/sentence`);
94
+ else if (med >= 18)
95
+ add("voice", "writes in long, detailed sentences", "does", 0.8, `median ${med} words/sentence`);
96
+ if (rate(dir) > 0.15)
97
+ add("voice", "leads with imperatives, minimal preamble", "does", 0.75, `${Math.round(rate(dir) * 100)}% of sentences open with a command verb`);
98
+ if (rate(hedge) < 0.05)
99
+ add("voice", "states things flatly; rarely hedges", "does", 0.8, `hedging in ${Math.round(rate(hedge) * 100)}% of sentences`);
100
+ if (emoji / messages.length < 0.02)
101
+ add("format", "no emoji", "avoids", 0.7, `${emoji} emoji across ${messages.length} prompts`);
102
+ if (lowerI > upperI * 1.3)
103
+ add("format", "casual register — lowercases “i”", "does", 0.7, `“i” ${lowerI}× vs “I” ${upperI}×`);
104
+ if (please < messages.length * 0.05)
105
+ add("voice", "skips pleasantries", "does", 0.6, `${please} please/thanks across ${messages.length} prompts`);
106
+ if (bulletLines > messages.length * 0.25)
107
+ add("format", "structures answers with bullet points", "prefers", 0.65, `${bulletLines} bulleted lines`);
108
+ // recurring phrasing → emphasis (these tend to be the user's repeated instructions/values)
109
+ for (const { phrase, count } of phrases)
110
+ add("emphasis", phrase, "insists", Math.min(0.9, 0.5 + count / (minP * 6)), `${count}×`);
111
+ const words = [...uni.entries()].filter(([, c]) => c >= minP).sort((a, b) => b[1] - a[1]).slice(0, 18).map(([word, count]) => ({ word, count }));
112
+ return { signals, words, pack: assemblePack(signals), prompts: messages.length };
113
+ }
114
+ /** Build the system-prompt-ready "voice" line from style signals (shared by import + serving). */
115
+ export function assemblePack(signals) {
116
+ const tone = signals.filter((s) => s.dimension !== "emphasis").map((s) => s.pattern);
117
+ const phrases = signals.filter((s) => s.dimension === "emphasis").map((s) => `“${s.pattern}”`);
118
+ if (!tone.length && !phrases.length)
119
+ return "";
120
+ const parts = [...tone];
121
+ if (phrases.length)
122
+ parts.push(`recurring phrasing: ${phrases.slice(0, 5).join(", ")}`);
123
+ return `Write like this user: ${parts.join("; ")}.`;
124
+ }
package/package.json CHANGED
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "name": "persnally",
3
- "version": "2.0.3",
3
+ "version": "2.2.0",
4
4
  "license": "FSL-1.1-MIT",
5
- "description": "The context engine for you — local-first, across every AI. So every AI finally knows you.",
5
+ "description": "Your own context engine — local-first, across every AI. So every AI finally knows you.",
6
6
  "type": "module",
7
7
  "bin": {
8
8
  "persnally": "build/src/cli.js",