persnally 2.1.0 → 2.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/src/cli.js +25 -2
- package/build/src/consolidate.d.ts +1 -0
- package/build/src/consolidate.js +5 -1
- package/build/src/daemon.js +11 -1
- package/build/src/dashboard.html +603 -137
- package/build/src/events.d.ts +24 -0
- package/build/src/events.js +10 -0
- package/build/src/importers/extract.js +12 -1
- package/build/src/mcp/index.js +60 -33
- package/build/src/prose.d.ts +10 -0
- package/build/src/prose.js +34 -0
- package/build/src/setup.js +6 -1
- package/build/src/store.d.ts +23 -0
- package/build/src/store.js +77 -1
- package/build/src/stylometry.d.ts +21 -0
- package/build/src/stylometry.js +124 -0
- package/package.json +1 -1
package/build/src/events.d.ts
CHANGED
|
@@ -59,6 +59,29 @@ export declare const PAYLOAD_SCHEMAS: {
|
|
|
59
59
|
proficiency: z.ZodNumber;
|
|
60
60
|
basis: z.ZodString;
|
|
61
61
|
}, z.core.$strip>;
|
|
62
|
+
readonly "signal.style": z.ZodObject<{
|
|
63
|
+
dimension: z.ZodEnum<{
|
|
64
|
+
format: "format";
|
|
65
|
+
voice: "voice";
|
|
66
|
+
convention: "convention";
|
|
67
|
+
emphasis: "emphasis";
|
|
68
|
+
workflow: "workflow";
|
|
69
|
+
}>;
|
|
70
|
+
pattern: z.ZodString;
|
|
71
|
+
polarity: z.ZodEnum<{
|
|
72
|
+
does: "does";
|
|
73
|
+
avoids: "avoids";
|
|
74
|
+
prefers: "prefers";
|
|
75
|
+
insists: "insists";
|
|
76
|
+
}>;
|
|
77
|
+
confidence: z.ZodNumber;
|
|
78
|
+
evidence: z.ZodString;
|
|
79
|
+
basis: z.ZodEnum<{
|
|
80
|
+
observed: "observed";
|
|
81
|
+
stylometry: "stylometry";
|
|
82
|
+
correction: "correction";
|
|
83
|
+
}>;
|
|
84
|
+
}, z.core.$strip>;
|
|
62
85
|
readonly "context.read": z.ZodObject<{
|
|
63
86
|
scope: z.ZodString;
|
|
64
87
|
client_purpose: z.ZodString;
|
|
@@ -134,6 +157,7 @@ export declare const eventSchema: z.ZodObject<{
|
|
|
134
157
|
"signal.topic": "signal.topic";
|
|
135
158
|
"signal.assertion": "signal.assertion";
|
|
136
159
|
"signal.skill": "signal.skill";
|
|
160
|
+
"signal.style": "signal.style";
|
|
137
161
|
"context.read": "context.read";
|
|
138
162
|
"agent.question": "agent.question";
|
|
139
163
|
"agent.answer": "agent.answer";
|
package/build/src/events.js
CHANGED
|
@@ -33,6 +33,16 @@ export const PAYLOAD_SCHEMAS = {
|
|
|
33
33
|
proficiency: z.number().min(0).max(1),
|
|
34
34
|
basis: z.string(),
|
|
35
35
|
}),
|
|
36
|
+
// How the user writes/works — the prescriptive layer (docs/CONTEXT_DEPTH.md).
|
|
37
|
+
// Structured so it dedupes by `pattern` and consolidates into stable constants.
|
|
38
|
+
"signal.style": z.object({
|
|
39
|
+
dimension: z.enum(["voice", "convention", "emphasis", "format", "workflow"]),
|
|
40
|
+
pattern: z.string().min(1),
|
|
41
|
+
polarity: z.enum(["does", "avoids", "prefers", "insists"]),
|
|
42
|
+
confidence: z.number().min(0).max(1),
|
|
43
|
+
evidence: z.string(),
|
|
44
|
+
basis: z.enum(["observed", "stylometry", "correction"]),
|
|
45
|
+
}),
|
|
36
46
|
"context.read": z.object({
|
|
37
47
|
scope: z.string(),
|
|
38
48
|
client_purpose: z.string(),
|
|
@@ -5,16 +5,23 @@
|
|
|
5
5
|
import { z } from "zod";
|
|
6
6
|
import { newEvent, safeIso, uuidv7, PAYLOAD_SCHEMAS } from "../events.js";
|
|
7
7
|
import { anthropicExtract, DEFAULT_EXTRACT_MODEL } from "../llm.js";
|
|
8
|
+
import { proseLines, stripNoise } from "../prose.js";
|
|
9
|
+
import { analyzeVoice } from "../stylometry.js";
|
|
8
10
|
const MAX_CONVO_CHARS = 30_000;
|
|
9
11
|
const topicsExtraction = z.object({ topics: z.array(PAYLOAD_SCHEMAS["signal.topic"]) });
|
|
10
12
|
const assertionsExtraction = z.object({ assertions: z.array(PAYLOAD_SCHEMAS["signal.assertion"]) });
|
|
11
13
|
export async function extractEvents(parsed, opts, extract = anthropicExtract, model = DEFAULT_EXTRACT_MODEL) {
|
|
12
14
|
const batch = uuidv7();
|
|
13
15
|
const events = [];
|
|
16
|
+
const voiceCorpus = []; // clean prose for the deterministic voice fingerprint
|
|
14
17
|
for (const convo of parsed.conversations) {
|
|
15
18
|
if (!convo.userMessages.length)
|
|
16
19
|
continue;
|
|
17
|
-
const
|
|
20
|
+
const joined = convo.userMessages.join("\n");
|
|
21
|
+
voiceCorpus.push(...proseLines(joined));
|
|
22
|
+
const text = stripNoise(joined).slice(0, MAX_CONVO_CHARS); // strip pasted paths/URLs/logs before the LLM sees it
|
|
23
|
+
if (!text)
|
|
24
|
+
continue;
|
|
18
25
|
const result = await extract({
|
|
19
26
|
model,
|
|
20
27
|
instruction: "Extract 1-5 topic signals from this conversation's user messages. Weight = centrality, depth = engagement level, sentiment = user's attitude toward the topic. Capture decisions and rejected options as their own signals.",
|
|
@@ -42,6 +49,10 @@ export async function extractEvents(parsed, opts, extract = anthropicExtract, mo
|
|
|
42
49
|
events.push(newEvent("signal.assertion", opts.source, a, { kind: "import", batch, file: opts.file }));
|
|
43
50
|
}
|
|
44
51
|
}
|
|
52
|
+
// Deterministic voice fingerprint over the user's own prose — no LLM, no tokens.
|
|
53
|
+
for (const s of analyzeVoice(voiceCorpus).signals) {
|
|
54
|
+
events.push(newEvent("signal.style", opts.source, s, { kind: "import", batch, file: opts.file }));
|
|
55
|
+
}
|
|
45
56
|
const span = parsed.conversations.map((c) => c.created_at).sort();
|
|
46
57
|
events.push(newEvent("system.import", "system", {
|
|
47
58
|
importer: opts.importer,
|
package/build/src/mcp/index.js
CHANGED
|
@@ -49,38 +49,51 @@ async function recordRead(scope, purpose, items) {
|
|
|
49
49
|
}
|
|
50
50
|
}
|
|
51
51
|
// ── persnally_track — write path ────────────────────────────
|
|
52
|
-
|
|
52
|
+
const TOPIC_SCHEMA = z.object({
|
|
53
|
+
topic: z.string().describe("The topic, decision, or preference (e.g. 'Rust async programming', 'chose SQLite over Postgres')"),
|
|
54
|
+
weight: z.number().min(0).max(1),
|
|
55
|
+
intent: z.enum(["learning", "building", "researching", "deciding", "discussing", "debugging"]),
|
|
56
|
+
sentiment: z.enum(["positive", "negative", "neutral"]),
|
|
57
|
+
depth: z.enum(["mention", "moderate", "deep"]),
|
|
58
|
+
category: z.enum(["technology", "business", "finance", "career", "health", "science", "creative", "education", "lifestyle", "news", "other"]),
|
|
59
|
+
entities: z.array(z.string()),
|
|
60
|
+
});
|
|
61
|
+
const STYLE_SCHEMA = z.object({
|
|
62
|
+
dimension: z.enum(["voice", "convention", "emphasis", "format", "workflow"])
|
|
63
|
+
.describe("voice=tone/phrasing; convention=tools/rules; emphasis=what they insist on; format=structure; workflow=how they work"),
|
|
64
|
+
pattern: z.string().min(1).describe("a short, reusable instruction — e.g. 'prefers pnpm over npm', 'wants the falsification first', 'terse, no filler'"),
|
|
65
|
+
polarity: z.enum(["does", "avoids", "prefers", "insists"]),
|
|
66
|
+
confidence: z.number().min(0).max(1).default(0.6),
|
|
67
|
+
evidence: z.string().default("").describe("a brief quote or why you believe it"),
|
|
68
|
+
});
|
|
69
|
+
server.tool("persnally_track", `Track what builds the user's lasting context. Two kinds of signal, both optional — send whichever this conversation produced.
|
|
53
70
|
|
|
54
|
-
|
|
71
|
+
TOPICS — what they're engaged with (interests, decisions, accepted/rejected options).
|
|
72
|
+
- 1-5 per conversation; weight = centrality (0.1 brief … 1.0 main focus); depth = mention|moderate|deep; sentiment 'negative' deprioritizes; entities are specific names ("Next.js", not "web framework").
|
|
55
73
|
|
|
56
|
-
|
|
57
|
-
-
|
|
58
|
-
-
|
|
59
|
-
- Depth: "mention" | "moderate" | "deep" (extensive discussion or problem-solving)
|
|
60
|
-
- Sentiment: "negative" means frustration or dislike (deprioritizes, never boosts)
|
|
61
|
-
- Entities are specific names: "Next.js" not "web framework"
|
|
74
|
+
STYLE — HOW they write and work, so every AI can answer like them. High value, but easy to over-send: record only a CLEAR, REPEATED tell, never a one-off, at most 1-3 per conversation. Examples:
|
|
75
|
+
- voice: "terse, no filler" · convention: "prefers pnpm over npm", "no default exports" · emphasis: "wants the falsification first" · format: "answers in bullet points" · workflow: "kills ideas fast".
|
|
76
|
+
- Skip anything generic or already obvious. When unsure, don't.
|
|
62
77
|
|
|
63
|
-
The user opted in. Only structured signals are stored, locally, never raw messages.`, {
|
|
64
|
-
topics: z.array(
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
sentiment: z.enum(["positive", "negative", "neutral"]),
|
|
69
|
-
depth: z.enum(["mention", "moderate", "deep"]),
|
|
70
|
-
category: z.enum(["technology", "business", "finance", "career", "health", "science", "creative", "education", "lifestyle", "news", "other"]),
|
|
71
|
-
entities: z.array(z.string()),
|
|
72
|
-
})).min(1),
|
|
73
|
-
}, async ({ topics }) => guarded(async () => {
|
|
74
|
-
logEvent("tool_call", { tool: "persnally_track", topics: topics.length });
|
|
78
|
+
The user opted in. Only these structured signals are stored, locally, never raw messages.`, {
|
|
79
|
+
topics: z.array(TOPIC_SCHEMA).optional(),
|
|
80
|
+
style: z.array(STYLE_SCHEMA).optional(),
|
|
81
|
+
}, async ({ topics, style }) => guarded(async () => {
|
|
82
|
+
logEvent("tool_call", { tool: "persnally_track", topics: topics?.length ?? 0, style: style?.length ?? 0 });
|
|
75
83
|
const client = clientSlug();
|
|
76
|
-
const events =
|
|
77
|
-
type: "signal.topic",
|
|
78
|
-
source: `mcp:${client}`,
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
84
|
+
const events = [
|
|
85
|
+
...(topics ?? []).map((t) => ({ type: "signal.topic", source: `mcp:${client}`, payload: t, provenance: { kind: "mcp", client } })),
|
|
86
|
+
...(style ?? []).map((s) => ({ type: "signal.style", source: `mcp:${client}`, payload: { ...s, basis: "observed" }, provenance: { kind: "mcp", client } })),
|
|
87
|
+
];
|
|
88
|
+
if (!events.length)
|
|
89
|
+
return text("Nothing to track — pass topics and/or style signals.");
|
|
82
90
|
await daemonPost("/events", events);
|
|
83
|
-
|
|
91
|
+
const parts = [];
|
|
92
|
+
if (topics?.length)
|
|
93
|
+
parts.push(`${topics.length} topic(s): ${topics.map((t) => t.topic).join(", ")}`);
|
|
94
|
+
if (style?.length)
|
|
95
|
+
parts.push(`${style.length} style signal(s)`);
|
|
96
|
+
return text(`Recorded ${parts.join(" · ")}.`);
|
|
84
97
|
}));
|
|
85
98
|
// ── persnally_context — read path (the Phase 2 core) ────────
|
|
86
99
|
server.tool("persnally_context", `Get the user's personal context: who they are, what they're working on, and their current interests.
|
|
@@ -91,11 +104,12 @@ Call this at the START of a conversation (or when personalization would improve
|
|
|
91
104
|
}, async ({ detail, purpose }) => guarded(async () => {
|
|
92
105
|
logEvent("tool_call", { tool: "persnally_context", detail });
|
|
93
106
|
const client = encodeURIComponent(getClient());
|
|
94
|
-
const [profile, topics] = await Promise.all([
|
|
107
|
+
const [profile, topics, voice] = await Promise.all([
|
|
95
108
|
daemonGet(`/profile?client=${client}`),
|
|
96
109
|
daemonGet(`/topics?limit=${detail === "full" ? 25 : 10}&client=${client}`),
|
|
110
|
+
daemonGet("/voice"),
|
|
97
111
|
]);
|
|
98
|
-
if (!profile && !topics?.length) {
|
|
112
|
+
if (!profile && !topics?.length && !voice?.pack) {
|
|
99
113
|
return text("No context yet — the user hasn't imported data or tracked any signals.");
|
|
100
114
|
}
|
|
101
115
|
let out = "";
|
|
@@ -106,6 +120,11 @@ Call this at the START of a conversation (or when personalization would improve
|
|
|
106
120
|
items += sections.length;
|
|
107
121
|
out += sections.map((s) => `## ${s.title}\n${s.body}`).join("\n\n");
|
|
108
122
|
}
|
|
123
|
+
// The prescriptive layer: how to write/answer so it fits this user, not a generic one.
|
|
124
|
+
if (voice?.pack) {
|
|
125
|
+
out += `${out ? "\n\n" : ""}# How to write for this user\n${voice.pack}`;
|
|
126
|
+
items += voice.items?.length ?? 0;
|
|
127
|
+
}
|
|
109
128
|
if (topics?.length) {
|
|
110
129
|
out += `\n\n# Current interests (decay-weighted)\n`;
|
|
111
130
|
out += topics.map((t) => `- ${t.topic} (${t.category}, ${t.dominant_intent}, weight ${t.weight.toFixed(2)})`).join("\n");
|
|
@@ -130,17 +149,25 @@ server.tool("persnally_interests", `Show the user their own tracked interest pro
|
|
|
130
149
|
return text(out);
|
|
131
150
|
}));
|
|
132
151
|
// ── persnally_forget — privacy control ──────────────────────
|
|
133
|
-
server.tool("persnally_forget", `Hard-delete a topic (and everything derived from it) from the user's context, or wipe all data. Privacy control — always honor it.`, {
|
|
134
|
-
topic: z.string().optional().describe("Topic to remove.
|
|
152
|
+
server.tool("persnally_forget", `Hard-delete a topic or a voice/style pattern (and everything derived from it) from the user's context, or wipe all data. Privacy control — always honor it. A forgotten style pattern stays gone permanently, even if later conversations would otherwise re-observe it.`, {
|
|
153
|
+
topic: z.string().optional().describe("Topic to remove."),
|
|
154
|
+
style: z.object({
|
|
155
|
+
dimension: z.enum(["voice", "convention", "emphasis", "format", "workflow"]),
|
|
156
|
+
pattern: z.string(),
|
|
157
|
+
}).optional().describe("A 'How you write' pattern to remove, e.g. {dimension: 'emphasis', pattern: 'be 100% sure'}."),
|
|
135
158
|
clear_all: z.boolean().optional().default(false),
|
|
136
|
-
}, async ({ topic, clear_all }) => guarded(async () => {
|
|
159
|
+
}, async ({ topic, style, clear_all }) => guarded(async () => {
|
|
137
160
|
logEvent("tool_call", { tool: "persnally_forget", clear_all });
|
|
138
161
|
if (clear_all) {
|
|
139
162
|
await daemonDelete("/events?confirm=all");
|
|
140
163
|
return text("All Persnally data deleted. The store is empty.");
|
|
141
164
|
}
|
|
165
|
+
if (style) {
|
|
166
|
+
const r = await daemonDelete(`/voice/${encodeURIComponent(style.dimension)}/${encodeURIComponent(style.pattern)}`);
|
|
167
|
+
return text(r.deleted ? `Forgot "${style.pattern}" — it won't be re-learned.` : `"${style.pattern}" not found.`);
|
|
168
|
+
}
|
|
142
169
|
if (!topic)
|
|
143
|
-
return text("Name a topic to forget, or set clear_all.");
|
|
170
|
+
return text("Name a topic or a style pattern to forget, or set clear_all.");
|
|
144
171
|
const r = await daemonDelete(`/topics/${encodeURIComponent(topic)}`);
|
|
145
172
|
return text(r.deleted ? `Deleted ${r.deleted} event(s) for "${topic}", including derived data.` : `"${topic}" not found.`);
|
|
146
173
|
}));
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Corpus hygiene. Imported prompts are polluted with pasted data (file paths,
|
|
3
|
+
* URLs, JSON/logs) and injected blocks (task notifications, reminders, command
|
|
4
|
+
* palettes, tool output). Unfiltered, that noise swamps both topic extraction
|
|
5
|
+
* and the voice fingerprint. See docs/CONTEXT_DEPTH.md.
|
|
6
|
+
*/
|
|
7
|
+
/** Remove injected blocks, fenced code, URLs, and filesystem paths. Keeps prose intact. */
|
|
8
|
+
export declare function stripNoise(text: string): string;
|
|
9
|
+
/** Strict: only the prose lines a human actually wrote — for stylometry. */
|
|
10
|
+
export declare function proseLines(text: string): string[];
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Corpus hygiene. Imported prompts are polluted with pasted data (file paths,
|
|
3
|
+
* URLs, JSON/logs) and injected blocks (task notifications, reminders, command
|
|
4
|
+
* palettes, tool output). Unfiltered, that noise swamps both topic extraction
|
|
5
|
+
* and the voice fingerprint. See docs/CONTEXT_DEPTH.md.
|
|
6
|
+
*/
|
|
7
|
+
// A line with at least one of these reads as a sentence, not pasted data.
|
|
8
|
+
const FUNCTION_WORD = /\b(the|a|an|i|to|and|is|it|you|we|that|this|of|for|in|on|do|are|be|can|should|need|want|make|how|what|why|let|so|but|not|just|with|like|now|also|when|if|because|about)\b/;
|
|
9
|
+
/** Remove injected blocks, fenced code, URLs, and filesystem paths. Keeps prose intact. */
|
|
10
|
+
export function stripNoise(text) {
|
|
11
|
+
return text
|
|
12
|
+
.replace(/```[\s\S]*?```/g, " ")
|
|
13
|
+
.replace(/<(?:task-notification|system-reminder|local-command[^>]*|command-[^>]*)>[\s\S]*?<\/[^>]+>/gi, " ")
|
|
14
|
+
.replace(/<\/?[a-z][^>]*>/gi, " ")
|
|
15
|
+
.replace(/https?:\/\/\S+/g, " ")
|
|
16
|
+
.replace(/(?:[~\w.\-]+)?(?:\/[\w.\-]+){2,}\/?/g, " ") // /a/b style paths
|
|
17
|
+
.replace(/[ \t]{2,}/g, " ")
|
|
18
|
+
.replace(/\n{3,}/g, "\n\n")
|
|
19
|
+
.trim();
|
|
20
|
+
}
|
|
21
|
+
/** Strict: only the prose lines a human actually wrote — for stylometry. */
|
|
22
|
+
export function proseLines(text) {
|
|
23
|
+
return stripNoise(text)
|
|
24
|
+
.split("\n")
|
|
25
|
+
.map((l) => l.trim())
|
|
26
|
+
.filter((ln) => {
|
|
27
|
+
if (ln.split(/\s+/).length < 2)
|
|
28
|
+
return false;
|
|
29
|
+
const letters = (ln.match(/[a-zA-Z]/g) || []).length;
|
|
30
|
+
if (!ln.length || letters / ln.length < 0.6)
|
|
31
|
+
return false; // json/logs/ids
|
|
32
|
+
return FUNCTION_WORD.test(" " + ln.toLowerCase() + " ");
|
|
33
|
+
});
|
|
34
|
+
}
|
package/build/src/setup.js
CHANGED
|
@@ -31,7 +31,12 @@ function zipHasConversations(zipPath) {
|
|
|
31
31
|
return execFileSync("unzip", ["-l", zipPath], { encoding: "utf-8", stdio: ["ignore", "pipe", "ignore"] })
|
|
32
32
|
.includes("conversations.json");
|
|
33
33
|
}
|
|
34
|
-
catch {
|
|
34
|
+
catch (e) {
|
|
35
|
+
// Only reached on a genuine read failure (unzip missing, corrupt archive,
|
|
36
|
+
// permission denied) — an ordinary non-matching zip never throws here, so
|
|
37
|
+
// this can't spam on unrelated Downloads clutter. Surface it: a real export
|
|
38
|
+
// failing silently is the worst onboarding failure mode there is.
|
|
39
|
+
console.error(`persnally: couldn't read ${zipPath} (${e instanceof Error ? e.message : e}) — skipping`);
|
|
35
40
|
return false;
|
|
36
41
|
}
|
|
37
42
|
}
|
package/build/src/store.d.ts
CHANGED
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
* Single source of truth per docs/EVENT_SCHEMA.md; views can always be re-derived.
|
|
4
4
|
*/
|
|
5
5
|
import { type PersnallyEvent } from "./events.js";
|
|
6
|
+
import { type StyleSignal } from "./stylometry.js";
|
|
6
7
|
export declare const DEFAULT_DB_PATH: string;
|
|
7
8
|
export interface QueryOpts {
|
|
8
9
|
type?: string;
|
|
@@ -53,6 +54,28 @@ export declare class EventStore {
|
|
|
53
54
|
rebuild(now?: number): void;
|
|
54
55
|
saveProfile(p: StoredProfile): void;
|
|
55
56
|
getProfile(): StoredProfile | null;
|
|
57
|
+
/** Logical key for one style pattern — stable across re-imports/re-observations. */
|
|
58
|
+
private styleKey;
|
|
59
|
+
/** Patterns the user has explicitly forgotten — a delete correction tombstones the key permanently. */
|
|
60
|
+
private forgottenStyleKeys;
|
|
61
|
+
/** The voice/convention profile — style signals deduped by pattern (newest wins), richest first, forgotten patterns excluded. */
|
|
62
|
+
voice(): {
|
|
63
|
+
pack: string;
|
|
64
|
+
items: StyleSignal[];
|
|
65
|
+
};
|
|
66
|
+
/**
|
|
67
|
+
* Hard-deletes a style pattern's events and writes a delete correction so it
|
|
68
|
+
* stays gone even if stylometry or live capture re-derives it later — the
|
|
69
|
+
* "deletable for real" promise extended to the voice layer.
|
|
70
|
+
*/
|
|
71
|
+
forgetStyle(dimension: string, pattern: string): number;
|
|
72
|
+
/** Drops style signals of one basis so a deterministic re-run replaces them (live `observed`/`correction` signals are kept). */
|
|
73
|
+
clearStyleByBasis(basis: string): number;
|
|
74
|
+
/**
|
|
75
|
+
* Consolidation distill: bounds the stored style backlog so live capture
|
|
76
|
+
* never grows unbounded. Keeps the richest signal per pattern, capped overall.
|
|
77
|
+
*/
|
|
78
|
+
pruneStyle(maxTotal?: number): number;
|
|
56
79
|
/** Hard-deletes matching topic events plus derived events referencing them, then rebuilds. */
|
|
57
80
|
forgetTopic(topic: string): number;
|
|
58
81
|
/** Removes every event from one import batch — a bad import is fully reversible. */
|
package/build/src/store.js
CHANGED
|
@@ -6,8 +6,9 @@ import Database from "better-sqlite3";
|
|
|
6
6
|
import { mkdirSync } from "node:fs";
|
|
7
7
|
import { dirname, join } from "node:path";
|
|
8
8
|
import { topicWeight } from "./decay.js";
|
|
9
|
-
import { normalizeTopic, validateEvent } from "./events.js";
|
|
9
|
+
import { newEvent, normalizeTopic, validateEvent } from "./events.js";
|
|
10
10
|
import { DATA_DIR } from "./paths.js";
|
|
11
|
+
import { assemblePack } from "./stylometry.js";
|
|
11
12
|
const VIEW_SCHEMA_VERSION = 2;
|
|
12
13
|
export const DEFAULT_DB_PATH = join(DATA_DIR, "persnally.db");
|
|
13
14
|
export class EventStore {
|
|
@@ -195,6 +196,81 @@ export class EventStore {
|
|
|
195
196
|
const row = this.db.prepare("SELECT * FROM view_profile WHERE id = 1").get();
|
|
196
197
|
return row ? { ...row, sections: JSON.parse(row.sections) } : null;
|
|
197
198
|
}
|
|
199
|
+
/** Logical key for one style pattern — stable across re-imports/re-observations. */
|
|
200
|
+
styleKey(dimension, pattern) {
|
|
201
|
+
return `style:${dimension}|${pattern.toLowerCase()}`;
|
|
202
|
+
}
|
|
203
|
+
/** Patterns the user has explicitly forgotten — a delete correction tombstones the key permanently. */
|
|
204
|
+
forgottenStyleKeys() {
|
|
205
|
+
const forgotten = new Set();
|
|
206
|
+
for (const e of this.query({ type: "user.correction", limit: 1_000_000 })) {
|
|
207
|
+
const p = e.payload;
|
|
208
|
+
if (p.action === "delete" && p.target_id.startsWith("style:"))
|
|
209
|
+
forgotten.add(p.target_id);
|
|
210
|
+
}
|
|
211
|
+
return forgotten;
|
|
212
|
+
}
|
|
213
|
+
/** The voice/convention profile — style signals deduped by pattern (newest wins), richest first, forgotten patterns excluded. */
|
|
214
|
+
voice() {
|
|
215
|
+
const forgotten = this.forgottenStyleKeys();
|
|
216
|
+
const byPattern = new Map();
|
|
217
|
+
// query() returns ts DESC, so the first occurrence of a pattern is the most recent.
|
|
218
|
+
for (const e of this.query({ type: "signal.style", limit: 1_000_000 })) {
|
|
219
|
+
const p = e.payload;
|
|
220
|
+
const key = this.styleKey(p.dimension, p.pattern);
|
|
221
|
+
if (forgotten.has(key) || byPattern.has(key))
|
|
222
|
+
continue;
|
|
223
|
+
byPattern.set(key, p);
|
|
224
|
+
}
|
|
225
|
+
// Cap the served set: live `observed` signals accrue over time, so bound it
|
|
226
|
+
// to the richest few (consolidation prunes the stored backlog separately).
|
|
227
|
+
const items = [...byPattern.values()].sort((a, b) => b.confidence - a.confidence).slice(0, 28);
|
|
228
|
+
return { pack: assemblePack(items), items };
|
|
229
|
+
}
|
|
230
|
+
/**
|
|
231
|
+
* Hard-deletes a style pattern's events and writes a delete correction so it
|
|
232
|
+
* stays gone even if stylometry or live capture re-derives it later — the
|
|
233
|
+
* "deletable for real" promise extended to the voice layer.
|
|
234
|
+
*/
|
|
235
|
+
forgetStyle(dimension, pattern) {
|
|
236
|
+
const key = this.styleKey(dimension, pattern);
|
|
237
|
+
const candidates = this.query({ type: "signal.style", limit: 1_000_000 }).filter((e) => this.styleKey(e.payload.dimension, e.payload.pattern) === key);
|
|
238
|
+
const del = this.db.prepare("DELETE FROM events WHERE id = ?");
|
|
239
|
+
const run = this.db.transaction((toDelete) => { for (const id of toDelete)
|
|
240
|
+
del.run(id); });
|
|
241
|
+
run(candidates.map((e) => e.id));
|
|
242
|
+
this.append([newEvent("user.correction", "dashboard", { target_id: key, action: "delete", reason: "" }, { kind: "local", surface: "dashboard" })]);
|
|
243
|
+
return candidates.length;
|
|
244
|
+
}
|
|
245
|
+
/** Drops style signals of one basis so a deterministic re-run replaces them (live `observed`/`correction` signals are kept). */
|
|
246
|
+
clearStyleByBasis(basis) {
|
|
247
|
+
return this.db
|
|
248
|
+
.prepare("DELETE FROM events WHERE type = 'signal.style' AND json_extract(payload, '$.basis') = ?")
|
|
249
|
+
.run(basis).changes;
|
|
250
|
+
}
|
|
251
|
+
/**
|
|
252
|
+
* Consolidation distill: bounds the stored style backlog so live capture
|
|
253
|
+
* never grows unbounded. Keeps the richest signal per pattern, capped overall.
|
|
254
|
+
*/
|
|
255
|
+
pruneStyle(maxTotal = 80) {
|
|
256
|
+
const byPattern = new Map();
|
|
257
|
+
for (const e of this.query({ type: "signal.style", limit: 1_000_000 })) {
|
|
258
|
+
const p = e.payload;
|
|
259
|
+
const key = this.styleKey(p.dimension, p.pattern);
|
|
260
|
+
const existing = byPattern.get(key);
|
|
261
|
+
if (!existing || existing.payload.confidence < p.confidence)
|
|
262
|
+
byPattern.set(key, e);
|
|
263
|
+
}
|
|
264
|
+
const ranked = [...byPattern.entries()].sort((a, b) => b[1].payload.confidence - a[1].payload.confidence);
|
|
265
|
+
const keepIds = new Set(ranked.slice(0, maxTotal).map(([, e]) => e.id));
|
|
266
|
+
const all = this.query({ type: "signal.style", limit: 1_000_000 });
|
|
267
|
+
const toDelete = all.filter((e) => !keepIds.has(e.id)).map((e) => e.id); // drop weaker duplicates + overflow
|
|
268
|
+
const del = this.db.prepare("DELETE FROM events WHERE id = ?");
|
|
269
|
+
const run = this.db.transaction((ids) => { for (const id of ids)
|
|
270
|
+
del.run(id); });
|
|
271
|
+
run(toDelete);
|
|
272
|
+
return toDelete.length;
|
|
273
|
+
}
|
|
198
274
|
/** Hard-deletes matching topic events plus derived events referencing them, then rebuilds. */
|
|
199
275
|
forgetTopic(topic) {
|
|
200
276
|
const key = normalizeTopic(topic);
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Deterministic voice fingerprint — no LLM, no tokens, nothing leaves the machine.
|
|
3
|
+
* Turns the user's own prose (already noise-filtered via prose.ts) into structured
|
|
4
|
+
* signal.style payloads + a prescriptive "voice" pack. See docs/CONTEXT_DEPTH.md.
|
|
5
|
+
*/
|
|
6
|
+
import { z } from "zod";
|
|
7
|
+
import { PAYLOAD_SCHEMAS } from "./events.js";
|
|
8
|
+
export type StyleSignal = z.infer<(typeof PAYLOAD_SCHEMAS)["signal.style"]>;
|
|
9
|
+
export interface VoiceProfile {
|
|
10
|
+
signals: StyleSignal[];
|
|
11
|
+
words: {
|
|
12
|
+
word: string;
|
|
13
|
+
count: number;
|
|
14
|
+
}[];
|
|
15
|
+
pack: string;
|
|
16
|
+
prompts: number;
|
|
17
|
+
}
|
|
18
|
+
/** Compute a voice profile from prose messages (each may be multi-line). */
|
|
19
|
+
export declare function analyzeVoice(messages: string[]): VoiceProfile;
|
|
20
|
+
/** Build the system-prompt-ready "voice" line from style signals (shared by import + serving). */
|
|
21
|
+
export declare function assemblePack(signals: StyleSignal[]): string;
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Deterministic voice fingerprint — no LLM, no tokens, nothing leaves the machine.
|
|
3
|
+
* Turns the user's own prose (already noise-filtered via prose.ts) into structured
|
|
4
|
+
* signal.style payloads + a prescriptive "voice" pack. See docs/CONTEXT_DEPTH.md.
|
|
5
|
+
*/
|
|
6
|
+
const STOP = new Set(("a an the and or but if then so of to in on for with at by from as is are was were be been being this that these those it its i you he she we they me my your our their them us do does did done have has had having will would can could should may might must not no yes what which who when where why how all any both each few more most other some such only own same than too very just about into over after before above below up down out off again once here there im ive youre were theyre lets")
|
|
7
|
+
.split(/\s+/));
|
|
8
|
+
const DIRECTIVE = new Set("make fix add remove create give check use keep build write update ensure confirm let lets do run show change implement refactor delete set move find get take generate review test verify explain tell help put start stop send pull push merge commit"
|
|
9
|
+
.split(" "));
|
|
10
|
+
const HEDGE = ["maybe", "i think", "probably", "perhaps", "kind of", "sort of", "i guess", "might be", "not sure", "i feel"];
|
|
11
|
+
const EMOJI = /\p{Extended_Pictographic}/gu;
|
|
12
|
+
const tokenize = (s) => s.toLowerCase().match(/[a-z0-9][a-z0-9']*/g) || [];
|
|
13
|
+
const median = (xs) => {
|
|
14
|
+
if (!xs.length)
|
|
15
|
+
return 0;
|
|
16
|
+
const s = [...xs].sort((a, b) => a - b), m = s.length >> 1;
|
|
17
|
+
return s.length % 2 ? s[m] : (s[m - 1] + s[m]) / 2;
|
|
18
|
+
};
|
|
19
|
+
const allStop = (g) => g.split(" ").every((w) => STOP.has(w) || /^\d+$/.test(w));
|
|
20
|
+
/** Compute a voice profile from prose messages (each may be multi-line). */
|
|
21
|
+
export function analyzeVoice(messages) {
|
|
22
|
+
if (!messages.length)
|
|
23
|
+
return { signals: [], words: [], pack: "", prompts: 0 };
|
|
24
|
+
const uni = new Map(), tri = new Map(), quad = new Map();
|
|
25
|
+
const sentLens = [];
|
|
26
|
+
const wordSet = new Set();
|
|
27
|
+
let total = 0, sent = 0, q = 0, dir = 0, hedge = 0, emoji = 0, lowerI = 0, upperI = 0, please = 0, bulletLines = 0;
|
|
28
|
+
for (const msg of messages) {
|
|
29
|
+
emoji += (msg.match(EMOJI) || []).length;
|
|
30
|
+
lowerI += (msg.match(/(?:^|\s)i(?:'|\s|$)/g) || []).length;
|
|
31
|
+
upperI += (msg.match(/(?:^|\s)I(?:'|\s|$)/g) || []).length;
|
|
32
|
+
for (const ln of msg.split("\n"))
|
|
33
|
+
if (/^\s*[-*•]\s/.test(ln))
|
|
34
|
+
bulletLines++;
|
|
35
|
+
const words = tokenize(msg);
|
|
36
|
+
total += words.length;
|
|
37
|
+
words.forEach((w) => {
|
|
38
|
+
wordSet.add(w);
|
|
39
|
+
if (!STOP.has(w) && w.length >= 4 && !/^\d+$/.test(w))
|
|
40
|
+
uni.set(w, (uni.get(w) || 0) + 1);
|
|
41
|
+
});
|
|
42
|
+
for (let i = 0; i < words.length - 2; i++) {
|
|
43
|
+
const g = words.slice(i, i + 3).join(" ");
|
|
44
|
+
tri.set(g, (tri.get(g) || 0) + 1);
|
|
45
|
+
}
|
|
46
|
+
for (let i = 0; i < words.length - 3; i++) {
|
|
47
|
+
const g = words.slice(i, i + 4).join(" ");
|
|
48
|
+
quad.set(g, (quad.get(g) || 0) + 1);
|
|
49
|
+
}
|
|
50
|
+
for (const raw of msg.match(/[^.!?\n]+[.!?]*/g) || []) {
|
|
51
|
+
const s = raw.trim();
|
|
52
|
+
if (!s)
|
|
53
|
+
continue;
|
|
54
|
+
sent++;
|
|
55
|
+
const sw = tokenize(s);
|
|
56
|
+
if (sw.length)
|
|
57
|
+
sentLens.push(sw.length);
|
|
58
|
+
if (/\?\s*$/.test(s))
|
|
59
|
+
q++;
|
|
60
|
+
const low = " " + s.toLowerCase() + " ";
|
|
61
|
+
if (HEDGE.some((h) => low.includes(h)))
|
|
62
|
+
hedge++;
|
|
63
|
+
if (sw[0] && DIRECTIVE.has(sw[0]))
|
|
64
|
+
dir++;
|
|
65
|
+
if (low.includes(" please ") || low.includes(" thanks") || low.includes("thank you"))
|
|
66
|
+
please++;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
if (!sent)
|
|
70
|
+
return { signals: [], words: [], pack: "", prompts: messages.length };
|
|
71
|
+
const minP = Math.max(3, Math.round(messages.length * 0.01));
|
|
72
|
+
const rate = (n) => n / sent;
|
|
73
|
+
// distinctive repeated phrases — rank by frequency (tiebreak longer); collapse
|
|
74
|
+
// overlapping windows of the same phrase by shared-token overlap, not just substring.
|
|
75
|
+
const phrases = [];
|
|
76
|
+
const keptTokens = [];
|
|
77
|
+
for (const [g, c] of [...quad.entries(), ...tri.entries()]
|
|
78
|
+
.filter(([g, c]) => c >= minP && !allStop(g))
|
|
79
|
+
.sort((a, b) => b[1] - a[1] || b[0].length - a[0].length)) {
|
|
80
|
+
const gt = g.split(" ");
|
|
81
|
+
if (keptTokens.some((k) => gt.filter((w) => k.has(w)).length >= 2))
|
|
82
|
+
continue; // same phrase, different window
|
|
83
|
+
phrases.push({ phrase: g, count: c });
|
|
84
|
+
keptTokens.push(new Set(gt));
|
|
85
|
+
if (phrases.length >= 8)
|
|
86
|
+
break;
|
|
87
|
+
}
|
|
88
|
+
const signals = [];
|
|
89
|
+
const med = median(sentLens);
|
|
90
|
+
const add = (dimension, pattern, polarity, confidence, evidence) => signals.push({ dimension, pattern, polarity, confidence: Math.round(confidence * 100) / 100, evidence, basis: "stylometry" });
|
|
91
|
+
// tone constants
|
|
92
|
+
if (med <= 11)
|
|
93
|
+
add("voice", "terse — short, declarative sentences", "does", 0.85, `median ${med} words/sentence`);
|
|
94
|
+
else if (med >= 18)
|
|
95
|
+
add("voice", "writes in long, detailed sentences", "does", 0.8, `median ${med} words/sentence`);
|
|
96
|
+
if (rate(dir) > 0.15)
|
|
97
|
+
add("voice", "leads with imperatives, minimal preamble", "does", 0.75, `${Math.round(rate(dir) * 100)}% of sentences open with a command verb`);
|
|
98
|
+
if (rate(hedge) < 0.05)
|
|
99
|
+
add("voice", "states things flatly; rarely hedges", "does", 0.8, `hedging in ${Math.round(rate(hedge) * 100)}% of sentences`);
|
|
100
|
+
if (emoji / messages.length < 0.02)
|
|
101
|
+
add("format", "no emoji", "avoids", 0.7, `${emoji} emoji across ${messages.length} prompts`);
|
|
102
|
+
if (lowerI > upperI * 1.3)
|
|
103
|
+
add("format", "casual register — lowercases “i”", "does", 0.7, `“i” ${lowerI}× vs “I” ${upperI}×`);
|
|
104
|
+
if (please < messages.length * 0.05)
|
|
105
|
+
add("voice", "skips pleasantries", "does", 0.6, `${please} please/thanks across ${messages.length} prompts`);
|
|
106
|
+
if (bulletLines > messages.length * 0.25)
|
|
107
|
+
add("format", "structures answers with bullet points", "prefers", 0.65, `${bulletLines} bulleted lines`);
|
|
108
|
+
// recurring phrasing → emphasis (these tend to be the user's repeated instructions/values)
|
|
109
|
+
for (const { phrase, count } of phrases)
|
|
110
|
+
add("emphasis", phrase, "insists", Math.min(0.9, 0.5 + count / (minP * 6)), `${count}×`);
|
|
111
|
+
const words = [...uni.entries()].filter(([, c]) => c >= minP).sort((a, b) => b[1] - a[1]).slice(0, 18).map(([word, count]) => ({ word, count }));
|
|
112
|
+
return { signals, words, pack: assemblePack(signals), prompts: messages.length };
|
|
113
|
+
}
|
|
114
|
+
/** Build the system-prompt-ready "voice" line from style signals (shared by import + serving). */
|
|
115
|
+
export function assemblePack(signals) {
|
|
116
|
+
const tone = signals.filter((s) => s.dimension !== "emphasis").map((s) => s.pattern);
|
|
117
|
+
const phrases = signals.filter((s) => s.dimension === "emphasis").map((s) => `“${s.pattern}”`);
|
|
118
|
+
if (!tone.length && !phrases.length)
|
|
119
|
+
return "";
|
|
120
|
+
const parts = [...tone];
|
|
121
|
+
if (phrases.length)
|
|
122
|
+
parts.push(`recurring phrasing: ${phrases.slice(0, 5).join(", ")}`);
|
|
123
|
+
return `Write like this user: ${parts.join("; ")}.`;
|
|
124
|
+
}
|