pi-memory-stone 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +60 -6
- package/package.json +16 -3
- package/skills/pi-memory-stone/SKILL.md +101 -0
- package/src/commands/index.ts +167 -2
- package/src/db/index.ts +29 -3
- package/src/index.ts +79 -29
- package/src/portable/index.ts +18 -5
- package/src/privacy/index.ts +19 -3
- package/src/retrieval/index.ts +12 -4
- package/src/session-state/index.ts +2 -1
- package/src/tools/index.ts +17 -12
- package/src/vault/capture.ts +268 -0
- package/src/vault/extract.ts +259 -0
- package/src/vault/fetch.ts +155 -0
- package/src/vault/index.ts +306 -0
- package/src/vault/intent.ts +37 -0
- package/src/vault/markdown.ts +120 -0
- package/src/vault/paths.ts +44 -0
- package/src/vault/quality.ts +65 -0
- package/src/vault/url-resolvers.ts +113 -0
package/src/index.ts
CHANGED
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
* - Deterministic turn_summary and file_activity capture on agent_end
|
|
10
10
|
* - FTS5 search
|
|
11
11
|
* - /memory-status, /memory-search, /memory-open, /memory-inject, /memory-last commands
|
|
12
|
+
* - /memory-vault-* commands and natural-language URL capture
|
|
12
13
|
* - memory_search, memory_open, memory_remember, memory_forget tools
|
|
13
14
|
* - Conservative same-project before_agent_start injection
|
|
14
15
|
*/
|
|
@@ -21,6 +22,8 @@ import { retrieve, buildInjectionPacket, formatInjectionForLlm } from "./retriev
|
|
|
21
22
|
import { getProjectId, getConfig, clearProjectCache } from "./config/index.js";
|
|
22
23
|
import { closeDb, getRecord, insertInjection } from "./db/index.js";
|
|
23
24
|
import { getMemorySessionState, manualRecordsToRankedResults } from "./session-state/index.js";
|
|
25
|
+
import { captureUrlToVault } from "./vault/capture.js";
|
|
26
|
+
import { parseVaultCaptureIntent } from "./vault/intent.js";
|
|
24
27
|
import { createHash } from "node:crypto";
|
|
25
28
|
|
|
26
29
|
// ─── Session-scoped state ───────────────────────────────────────────
|
|
@@ -31,6 +34,37 @@ const injectedRefsThisSession: Set<string> = new Set();
|
|
|
31
34
|
/** Whether memory injection is temporarily disabled for this session */
|
|
32
35
|
let sessionEnabled = true;
|
|
33
36
|
|
|
37
|
+
async function maybeCaptureVaultUrl(
|
|
38
|
+
prompt: string,
|
|
39
|
+
projectId: string | null,
|
|
40
|
+
cwd: string,
|
|
41
|
+
signal?: AbortSignal,
|
|
42
|
+
): Promise<string | null> {
|
|
43
|
+
const intent = parseVaultCaptureIntent(prompt);
|
|
44
|
+
if (!intent) return null;
|
|
45
|
+
|
|
46
|
+
try {
|
|
47
|
+
const result = await captureUrlToVault(intent.scope, projectId, cwd, intent.url, { signal });
|
|
48
|
+
const warnings = result.warnings.length > 0 ? `\nWarnings: ${result.warnings.join("; ")}` : "";
|
|
49
|
+
return `Captured web page into ${intent.scope} memory vault${result.initialized ? " (initialized vault)" : ""}: ${result.title}\nQuality: ${result.quality} (${result.qualityScore})${warnings}\nPage: ${result.pagePath}\nSource packet: ${result.sourcePacketPath}`;
|
|
50
|
+
} catch (err) {
|
|
51
|
+
const message = err instanceof Error ? err.message : String(err);
|
|
52
|
+
console.error("[pi-memory-stone] vault URL capture failed:", err);
|
|
53
|
+
return `Memory vault URL capture failed: ${message}`;
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
function vaultCaptureReturn(systemPrompt: string, notice: string) {
|
|
58
|
+
return {
|
|
59
|
+
message: {
|
|
60
|
+
customType: "memory-vault-capture",
|
|
61
|
+
content: notice,
|
|
62
|
+
display: true,
|
|
63
|
+
},
|
|
64
|
+
systemPrompt: `${systemPrompt}\n\n--- Memory Vault Capture ---\n${notice}\nThe user's vault capture request has already been handled by pi-memory-stone. Briefly confirm the result; do not fetch the same URL again unless the user asks.\n--- End Memory Vault Capture ---`,
|
|
65
|
+
};
|
|
66
|
+
}
|
|
67
|
+
|
|
34
68
|
// ─── Extension entry point ─────────────────────────────────────────
|
|
35
69
|
|
|
36
70
|
export default function (pi: ExtensionAPI) {
|
|
@@ -69,18 +103,24 @@ export default function (pi: ExtensionAPI) {
|
|
|
69
103
|
|
|
70
104
|
pi.on("before_agent_start", async (event, ctx) => {
|
|
71
105
|
try {
|
|
106
|
+
const prompt = event.prompt || "";
|
|
107
|
+
const projectId = getProjectId(ctx.cwd);
|
|
108
|
+
const vaultCaptureNotice = await maybeCaptureVaultUrl(prompt, projectId, ctx.cwd, ctx.signal);
|
|
109
|
+
|
|
72
110
|
// Check if memory is enabled
|
|
73
111
|
const config = getConfig(ctx.cwd);
|
|
74
|
-
if (!config.enabled)
|
|
112
|
+
if (!config.enabled) {
|
|
113
|
+
return vaultCaptureNotice ? vaultCaptureReturn(event.systemPrompt || "", vaultCaptureNotice) : undefined;
|
|
114
|
+
}
|
|
75
115
|
|
|
76
116
|
const sessionState = getMemorySessionState(ctx.sessionManager.getBranch());
|
|
77
117
|
sessionEnabled = sessionState.enabled;
|
|
78
118
|
|
|
79
|
-
if (!sessionEnabled)
|
|
119
|
+
if (!sessionEnabled) {
|
|
120
|
+
return vaultCaptureNotice ? vaultCaptureReturn(event.systemPrompt || "", vaultCaptureNotice) : undefined;
|
|
121
|
+
}
|
|
80
122
|
|
|
81
|
-
const prompt = event.prompt || "";
|
|
82
123
|
const promptHash = createHash("sha256").update(prompt).digest("hex").slice(0, 12);
|
|
83
|
-
const projectId = getProjectId(ctx.cwd);
|
|
84
124
|
const injectionMode = sessionState.injectionMode ?? config.injectionMode;
|
|
85
125
|
|
|
86
126
|
const manualRecords = sessionState.manualRefs
|
|
@@ -103,10 +143,11 @@ export default function (pi: ExtensionAPI) {
|
|
|
103
143
|
}
|
|
104
144
|
|
|
105
145
|
const selectedResults = [...manualResults, ...autoResults];
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
146
|
+
let formatted: string | null = null;
|
|
147
|
+
if (selectedResults.length > 0) {
|
|
148
|
+
const packet = buildInjectionPacket(selectedResults);
|
|
149
|
+
formatted = formatInjectionForLlm(packet, config.maxInjectedTokens);
|
|
150
|
+
}
|
|
110
151
|
|
|
111
152
|
// Track only search-selected refs. Manually chosen refs are intentionally
|
|
112
153
|
// injected on every turn until /memory-clear-injected is used.
|
|
@@ -114,27 +155,36 @@ export default function (pi: ExtensionAPI) {
|
|
|
114
155
|
injectedRefsThisSession.add(r.record.id);
|
|
115
156
|
}
|
|
116
157
|
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
158
|
+
if (formatted) {
|
|
159
|
+
insertInjection({
|
|
160
|
+
session_id: ctx.sessionManager.getSessionId(),
|
|
161
|
+
turn_entry_id: ctx.sessionManager.getLeafId() ?? undefined,
|
|
162
|
+
prompt_hash: promptHash,
|
|
163
|
+
injected_refs: selectedResults.map((r) => r.record.id).join(","),
|
|
164
|
+
packet: formatted,
|
|
165
|
+
reasons: selectedResults.map((r) => r.reasons.join(";")).join(" | "),
|
|
166
|
+
});
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
if (!formatted && !vaultCaptureNotice) return;
|
|
170
|
+
|
|
171
|
+
let systemPrompt = event.systemPrompt || "";
|
|
172
|
+
if (formatted) {
|
|
173
|
+
// Inject as a non-context audit custom entry (separate from LLM context)
|
|
174
|
+
// but also as a system prompt addition for the LLM
|
|
175
|
+
systemPrompt += [
|
|
176
|
+
"",
|
|
177
|
+
"--- Memory Stone Context ---",
|
|
178
|
+
formatted,
|
|
179
|
+
"--- End Memory Stone Context ---",
|
|
180
|
+
].join("\n");
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
if (vaultCaptureNotice) {
|
|
184
|
+
return vaultCaptureReturn(systemPrompt, vaultCaptureNotice);
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
return { systemPrompt };
|
|
138
188
|
} catch (err) {
|
|
139
189
|
console.error("[pi-memory-stone] before_agent_start handler error:", err);
|
|
140
190
|
}
|
package/src/portable/index.ts
CHANGED
|
@@ -2,10 +2,11 @@
|
|
|
2
2
|
* Portable export/import/backup helpers for memory records.
|
|
3
3
|
*/
|
|
4
4
|
|
|
5
|
-
import { copyFileSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
5
|
+
import { chmodSync, copyFileSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
6
6
|
import { dirname, isAbsolute, resolve } from "node:path";
|
|
7
|
-
import { getDb, getDbPath, listRecords, upsertRecord, type RecordRow } from "../db/index.js";
|
|
7
|
+
import { getDb, getDbPath, hardenDbFilePermissions, listRecords, upsertRecord, type RecordRow } from "../db/index.js";
|
|
8
8
|
import { SCHEMA_VERSION, RECORD_KINDS, RECORD_SCOPES, RECORD_STATUSES, type RecordKind, type RecordScope, type RecordStatus } from "../db/schema.js";
|
|
9
|
+
import { isSensitiveForGlobalMemory } from "../privacy/index.js";
|
|
9
10
|
|
|
10
11
|
export type ExportFormat = "json" | "md";
|
|
11
12
|
|
|
@@ -68,8 +69,8 @@ export function exportMemory(format: ExportFormat, includeInactive = false): str
|
|
|
68
69
|
export function writeMemoryExport(path: string, format: ExportFormat, includeInactive = false): number {
|
|
69
70
|
const payload = buildMemoryExport(includeInactive);
|
|
70
71
|
const content = format === "json" ? JSON.stringify(payload, null, 2) + "\n" : exportMarkdown(payload);
|
|
71
|
-
mkdirSync(dirname(path), { recursive: true });
|
|
72
|
-
writeFileSync(path, content, "utf8");
|
|
72
|
+
mkdirSync(dirname(path), { recursive: true, mode: 0o700 });
|
|
73
|
+
writeFileSync(path, content, { encoding: "utf8", mode: 0o600 });
|
|
73
74
|
return payload.records.length;
|
|
74
75
|
}
|
|
75
76
|
|
|
@@ -94,6 +95,14 @@ export function importMemoryJson(raw: string, options: ImportOptions = {}): Impo
|
|
|
94
95
|
|
|
95
96
|
const scope = options.scopeOverride ?? record.scope;
|
|
96
97
|
const projectId = scope === "global" ? null : (options.projectId !== undefined ? options.projectId : record.project_id);
|
|
98
|
+
if (scope === "project" && !projectId) {
|
|
99
|
+
result.skipped += 1;
|
|
100
|
+
continue;
|
|
101
|
+
}
|
|
102
|
+
if (scope === "global" && isSensitiveForGlobalMemory(`${record.text}\n${record.tags ?? ""}`)) {
|
|
103
|
+
result.skipped += 1;
|
|
104
|
+
continue;
|
|
105
|
+
}
|
|
97
106
|
const id = upsertRecord({
|
|
98
107
|
kind: record.kind,
|
|
99
108
|
scope,
|
|
@@ -117,9 +126,13 @@ export function importMemoryJson(raw: string, options: ImportOptions = {}): Impo
|
|
|
117
126
|
}
|
|
118
127
|
|
|
119
128
|
export function backupMemoryDatabase(path: string): void {
|
|
120
|
-
mkdirSync(dirname(path), { recursive: true });
|
|
129
|
+
mkdirSync(dirname(path), { recursive: true, mode: 0o700 });
|
|
121
130
|
getDb().exec("PRAGMA wal_checkpoint(TRUNCATE)");
|
|
131
|
+
hardenDbFilePermissions();
|
|
122
132
|
copyFileSync(getDbPath(), path);
|
|
133
|
+
try {
|
|
134
|
+
chmodSync(path, 0o600);
|
|
135
|
+
} catch {}
|
|
123
136
|
}
|
|
124
137
|
|
|
125
138
|
export function defaultPortablePath(cwd: string, prefix: string, extension: string): string {
|
package/src/privacy/index.ts
CHANGED
|
@@ -27,7 +27,7 @@ const SECRET_PATTERNS: { name: string; regex: RegExp; replacement: SecretReplace
|
|
|
27
27
|
},
|
|
28
28
|
{
|
|
29
29
|
name: "aws-secret",
|
|
30
|
-
regex:
|
|
30
|
+
regex: /\b(?:aws[_-]?)?secret[_-]?access[_-]?key\b\s*[=:]\s*['"]?[A-Za-z0-9/+=]{40,}['"]?/gi,
|
|
31
31
|
replacement: "[REDACTED:aws-secret]",
|
|
32
32
|
},
|
|
33
33
|
{
|
|
@@ -37,12 +37,12 @@ const SECRET_PATTERNS: { name: string; regex: RegExp; replacement: SecretReplace
|
|
|
37
37
|
},
|
|
38
38
|
{
|
|
39
39
|
name: "generic-api-key",
|
|
40
|
-
regex:
|
|
40
|
+
regex: /\b(?:api[_-]?key|apikey|api[_-]?secret|secret[_-]?key|client[_-]?secret|private[_-]?key|access[_-]?key|auth[_-]?key)\b\s*[=:]\s*['"]?[A-Za-z0-9_\-./+=]{16,}['"]?/gi,
|
|
41
41
|
replacement: "[REDACTED:api-key]",
|
|
42
42
|
},
|
|
43
43
|
{
|
|
44
44
|
name: "secret-assignment",
|
|
45
|
-
regex: /\b(?:secret|secret[_-]?key)\b\s*[=:]\s*(?:['"][^'"]+['"]|[^\s'"`]+)/gi,
|
|
45
|
+
regex: /\b(?:secret|secret[_-]?key|client[_-]?secret|app[_-]?secret|webhook[_-]?secret|signing[_-]?secret)\b\s*[=:]\s*(?:['"][^'"]+['"]|[^\s'"`]+)/gi,
|
|
46
46
|
replacement: "[REDACTED:secret]",
|
|
47
47
|
},
|
|
48
48
|
{
|
|
@@ -126,6 +126,22 @@ export function redactSecrets(text: string): string {
|
|
|
126
126
|
return result;
|
|
127
127
|
}
|
|
128
128
|
|
|
129
|
+
export function isSensitiveForGlobalMemory(text: string): boolean {
|
|
130
|
+
if (redactSecrets(text) !== text) return true;
|
|
131
|
+
|
|
132
|
+
return [
|
|
133
|
+
// Local/absolute/relative filesystem paths and common repo paths.
|
|
134
|
+
/(?:^|\s)(?:~|\.|\.\.|[A-Za-z]:)?[/\\][^\s]+/,
|
|
135
|
+
/\b(?:src|lib|test|tests|packages|apps|docs|config)\/[\w./-]+\b/i,
|
|
136
|
+
/\b[\w.-]+\.(?:ts|tsx|js|jsx|mjs|cjs|json|yaml|yml|toml|env|db|sqlite|pem|key|crt)\b/i,
|
|
137
|
+
// Hostnames and network endpoints.
|
|
138
|
+
/\b(?:localhost|127\.0\.0\.1|0\.0\.0\.0|::1)\b/i,
|
|
139
|
+
/\b[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?(?:\.[a-z0-9](?:[a-z0-9-]{0,61}[a-z0-9])?)+(?::\d{2,5})?\b/i,
|
|
140
|
+
// Implementation/internal detail markers that should stay project-local.
|
|
141
|
+
/\b(?:internal|private|implementation detail|class|function|method|module|endpoint|schema|table|column)\b/i,
|
|
142
|
+
].some((pattern) => pattern.test(text));
|
|
143
|
+
}
|
|
144
|
+
|
|
129
145
|
export function isSensitivePath(path: string, extraPatterns: RegExp[] = []): boolean {
|
|
130
146
|
const allPatterns = [...DEFAULT_SENSITIVE_PATHS, ...extraPatterns];
|
|
131
147
|
|
package/src/retrieval/index.ts
CHANGED
|
@@ -23,6 +23,8 @@ const KIND_BOOST: Record<string, number> = {
|
|
|
23
23
|
// ─── Recency decay ──────────────────────────────────────────────────
|
|
24
24
|
|
|
25
25
|
const RECENCY_HALF_LIFE_MS = 7 * 24 * 60 * 60 * 1000; // 7 days
|
|
26
|
+
export const MAX_RETRIEVAL_LIMIT = 20;
|
|
27
|
+
const MAX_CANDIDATE_LIMIT = MAX_RETRIEVAL_LIMIT * 10;
|
|
26
28
|
|
|
27
29
|
function recencyDecay(createdAt: number): number {
|
|
28
30
|
const age = Date.now() - createdAt;
|
|
@@ -72,7 +74,7 @@ export function rankAndFilter(
|
|
|
72
74
|
// and require explicit cross-project retrieval.
|
|
73
75
|
if (rec.scope === "global") {
|
|
74
76
|
if (!crossProjectEnabled) continue;
|
|
75
|
-
} else if (rec.project_id
|
|
77
|
+
} else if (!rec.project_id || !currentProjectId || rec.project_id !== currentProjectId) {
|
|
76
78
|
continue;
|
|
77
79
|
}
|
|
78
80
|
|
|
@@ -121,6 +123,11 @@ export function rankAndFilter(
|
|
|
121
123
|
|
|
122
124
|
// ─── Full retrieval pipeline ────────────────────────────────────────
|
|
123
125
|
|
|
126
|
+
export function normalizeRetrievalLimit(value: unknown, fallback: number): number {
|
|
127
|
+
const numeric = typeof value === "number" && Number.isFinite(value) ? Math.floor(value) : fallback;
|
|
128
|
+
return Math.max(1, Math.min(MAX_RETRIEVAL_LIMIT, numeric));
|
|
129
|
+
}
|
|
130
|
+
|
|
124
131
|
export function retrieve(
|
|
125
132
|
userPrompt: string,
|
|
126
133
|
currentProjectId: string | null,
|
|
@@ -133,13 +140,14 @@ export function retrieve(
|
|
|
133
140
|
},
|
|
134
141
|
): RankedResult[] {
|
|
135
142
|
const config = getConfig();
|
|
136
|
-
const limit = opts?.limit
|
|
143
|
+
const limit = normalizeRetrievalLimit(opts?.limit, config.maxInjectedRecords);
|
|
137
144
|
const crossProject = opts?.crossProjectEnabled ?? config.crossProjectEnabled;
|
|
138
145
|
|
|
139
146
|
const query = buildSearchQuery(userPrompt, recentFiles);
|
|
140
147
|
|
|
141
|
-
// Get more candidates than needed (ranking will filter)
|
|
142
|
-
const
|
|
148
|
+
// Get more candidates than needed (ranking will filter), but keep local work bounded.
|
|
149
|
+
const candidateLimit = Math.min(MAX_CANDIDATE_LIMIT, limit * 10);
|
|
150
|
+
const candidates = searchRecordsFts(query, candidateLimit, opts?.kindFilter, opts?.scopeFilter);
|
|
143
151
|
|
|
144
152
|
const ranked = rankAndFilter(candidates, currentProjectId, crossProject);
|
|
145
153
|
|
|
@@ -67,7 +67,8 @@ export function parseRefArgs(args: string): string[] {
|
|
|
67
67
|
}
|
|
68
68
|
|
|
69
69
|
export function isRecordVisibleInProject(record: RecordRow, currentProjectId: string | null): boolean {
|
|
70
|
-
|
|
70
|
+
if (record.scope === "global") return true;
|
|
71
|
+
return Boolean(currentProjectId && record.project_id && record.project_id === currentProjectId);
|
|
71
72
|
}
|
|
72
73
|
|
|
73
74
|
export function manualRecordsToRankedResults(
|
package/src/tools/index.ts
CHANGED
|
@@ -6,8 +6,10 @@ import type { ExtensionAPI } from "@earendil-works/pi-coding-agent";
|
|
|
6
6
|
import { Type } from "typebox";
|
|
7
7
|
import { StringEnum } from "@earendil-works/pi-ai";
|
|
8
8
|
import { getRecord, softForgetRecord, upsertRecord } from "../db/index.js";
|
|
9
|
-
import { retrieve, buildInjectionPacket, formatInjectionForLlm } from "../retrieval/index.js";
|
|
9
|
+
import { retrieve, buildInjectionPacket, formatInjectionForLlm, normalizeRetrievalLimit } from "../retrieval/index.js";
|
|
10
10
|
import { getProjectId, getConfig } from "../config/index.js";
|
|
11
|
+
import { isSensitiveForGlobalMemory } from "../privacy/index.js";
|
|
12
|
+
import { isRecordVisibleInProject } from "../session-state/index.js";
|
|
11
13
|
import type { RecordKind, RecordScope } from "../db/schema.js";
|
|
12
14
|
|
|
13
15
|
export function registerTools(pi: ExtensionAPI): void {
|
|
@@ -37,12 +39,12 @@ export function registerTools(pi: ExtensionAPI): void {
|
|
|
37
39
|
] as const),
|
|
38
40
|
),
|
|
39
41
|
scope: Type.Optional(StringEnum(["project", "global"] as const)),
|
|
40
|
-
limit: Type.Optional(Type.Number({ description: "Max results (default 5)" })),
|
|
42
|
+
limit: Type.Optional(Type.Number({ description: "Max results (default 5, max 20)", minimum: 1, maximum: 20 })),
|
|
41
43
|
}),
|
|
42
44
|
async execute(toolCallId, params, _signal, _onUpdate, ctx) {
|
|
43
45
|
const projectId = getProjectId(ctx.cwd);
|
|
44
46
|
const config = getConfig(ctx.cwd);
|
|
45
|
-
const limit = params.limit
|
|
47
|
+
const limit = normalizeRetrievalLimit(params.limit, 5);
|
|
46
48
|
|
|
47
49
|
const results = retrieve(params.query, projectId, [], {
|
|
48
50
|
limit,
|
|
@@ -102,10 +104,8 @@ export function registerTools(pi: ExtensionAPI): void {
|
|
|
102
104
|
}
|
|
103
105
|
|
|
104
106
|
const currentProjectId = getProjectId(ctx.cwd);
|
|
105
|
-
const visibleInCurrentProject =
|
|
106
|
-
record.scope === "global" || record.project_id === null || record.project_id === currentProjectId;
|
|
107
107
|
|
|
108
|
-
if (record.status !== "active" || !
|
|
108
|
+
if (record.status !== "active" || !isRecordVisibleInProject(record, currentProjectId)) {
|
|
109
109
|
return {
|
|
110
110
|
content: [{ type: "text", text: `Memory record ${params.ref} is not available.` }],
|
|
111
111
|
details: { ref: params.ref, found: false, unavailable: true },
|
|
@@ -168,10 +168,7 @@ export function registerTools(pi: ExtensionAPI): void {
|
|
|
168
168
|
let scope = params.scope ?? "project";
|
|
169
169
|
|
|
170
170
|
// Safety: never allow global for implementation details, paths, etc.
|
|
171
|
-
const isSensitiveForGlobal =
|
|
172
|
-
/\b(?:password|secret|token|key|\.env|localhost|127\.0\.0\.1|internal|private)\b/i.test(
|
|
173
|
-
params.text,
|
|
174
|
-
);
|
|
171
|
+
const isSensitiveForGlobal = isSensitiveForGlobalMemory(`${params.text}\n${params.tags ?? ""}`);
|
|
175
172
|
|
|
176
173
|
const downgradedToProject = isSensitiveForGlobal && scope === "global";
|
|
177
174
|
if (downgradedToProject) {
|
|
@@ -228,14 +225,22 @@ export function registerTools(pi: ExtensionAPI): void {
|
|
|
228
225
|
};
|
|
229
226
|
}
|
|
230
227
|
|
|
228
|
+
const currentProjectId = getProjectId(ctx.cwd);
|
|
229
|
+
if (record.status !== "active" || !isRecordVisibleInProject(record, currentProjectId)) {
|
|
230
|
+
return {
|
|
231
|
+
content: [{ type: "text", text: `Memory record ${params.ref} is not available.` }],
|
|
232
|
+
details: { ref: params.ref, found: false, unavailable: true },
|
|
233
|
+
};
|
|
234
|
+
}
|
|
235
|
+
|
|
231
236
|
if (params.hard) {
|
|
232
237
|
// For hard delete via tool, we require the user to explicitly confirm
|
|
233
|
-
// The tool should note this requires user interaction
|
|
238
|
+
// The tool should note this requires user interaction without leaking record contents.
|
|
234
239
|
return {
|
|
235
240
|
content: [
|
|
236
241
|
{
|
|
237
242
|
type: "text",
|
|
238
|
-
text: `Permanent deletion requires explicit confirmation. Please use /memory-forget ${params.ref} --hard to permanently delete this record
|
|
243
|
+
text: `Permanent deletion requires explicit confirmation. Please use /memory-forget ${params.ref} --hard to permanently delete this record.`,
|
|
239
244
|
},
|
|
240
245
|
],
|
|
241
246
|
details: { ref: params.ref, requiresConfirmation: true },
|
|
@@ -0,0 +1,268 @@
|
|
|
1
|
+
/** URL capture for memory vault source pages. */
|
|
2
|
+
|
|
3
|
+
import { createHash } from "node:crypto";
|
|
4
|
+
import { mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
5
|
+
import { join, relative } from "node:path";
|
|
6
|
+
import { redactSecrets } from "../privacy/index.js";
|
|
7
|
+
import { initVault, getVaultStatus, type VaultRegistry, type VaultRegistryPage } from "./index.js";
|
|
8
|
+
import { sanitizeSlug } from "./markdown.js";
|
|
9
|
+
import { resolveSourcePacketPath, resolveVaultPath, type VaultScope } from "./paths.js";
|
|
10
|
+
import { extractArticle, type ExtractedArticle } from "./extract.js";
|
|
11
|
+
import { assessCaptureQuality, type CaptureQuality, type CaptureQualityReport } from "./quality.js";
|
|
12
|
+
import { fetchCandidate, type CaptureFetchAttempt, type CaptureFetchOptions, type FetchedCandidate } from "./fetch.js";
|
|
13
|
+
import { resolveCaptureTargets, type CaptureCandidate } from "./url-resolvers.js";
|
|
14
|
+
|
|
15
|
+
const MAX_EXTRACTED_CHARS = 200_000;
|
|
16
|
+
|
|
17
|
+
export interface CaptureUrlOptions extends CaptureFetchOptions {}
|
|
18
|
+
|
|
19
|
+
export interface CaptureUrlResult {
|
|
20
|
+
vaultPath: string;
|
|
21
|
+
pagePath: string;
|
|
22
|
+
sourcePacketPath: string;
|
|
23
|
+
title: string;
|
|
24
|
+
url: string;
|
|
25
|
+
finalUrl: string;
|
|
26
|
+
initialized: boolean;
|
|
27
|
+
quality: CaptureQuality;
|
|
28
|
+
qualityScore: number;
|
|
29
|
+
warnings: string[];
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export async function captureUrlToVault(
|
|
33
|
+
scope: VaultScope,
|
|
34
|
+
projectId: string | null,
|
|
35
|
+
cwd: string,
|
|
36
|
+
url: string,
|
|
37
|
+
options: CaptureUrlOptions = {},
|
|
38
|
+
): Promise<CaptureUrlResult> {
|
|
39
|
+
const targets = resolveCaptureTargets(url);
|
|
40
|
+
const vaultPath = resolveVaultPath(scope, projectId, cwd);
|
|
41
|
+
const wasInitialized = getVaultStatus(scope, projectId, cwd).initialized;
|
|
42
|
+
if (!wasInitialized) {
|
|
43
|
+
initVault(scope, projectId, cwd);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
const selected = await fetchAndExtractBest(targets.candidates, options);
|
|
47
|
+
const title = selected.extracted.title || new URL(selected.fetched.finalUrl).hostname;
|
|
48
|
+
const slug = sanitizeSlug(title).slice(0, 70) || "captured-page";
|
|
49
|
+
const captureId = `SRC-${new Date().toISOString().slice(0, 10)}-${sha256(targets.originalUrl).slice(0, 8)}`;
|
|
50
|
+
const packetPath = resolveSourcePacketPath(scope, projectId, cwd, captureId);
|
|
51
|
+
const packetRelPath = normalizePath(relative(vaultPath, packetPath));
|
|
52
|
+
const sourcePageRelPath = join("sources", `${slug}-${sha256(targets.originalUrl).slice(0, 8)}.md`);
|
|
53
|
+
const sourcePagePath = join(vaultPath, sourcePageRelPath);
|
|
54
|
+
|
|
55
|
+
mkdirSync(join(packetPath, "original"), { recursive: true, mode: 0o700 });
|
|
56
|
+
mkdirSync(join(packetPath, "attachments"), { recursive: true, mode: 0o700 });
|
|
57
|
+
mkdirSync(join(vaultPath, "sources"), { recursive: true, mode: 0o700 });
|
|
58
|
+
|
|
59
|
+
const capturedAt = new Date().toISOString();
|
|
60
|
+
const originalName = originalArtifactName(selected.fetched.contentType, selected.fetched.finalUrl, selected.extracted.extractor);
|
|
61
|
+
const extractedMarkdown = unescapeRedactionMarkers(redactSecrets(selected.extracted.markdown)).slice(0, MAX_EXTRACTED_CHARS);
|
|
62
|
+
const redactedRaw = redactSecrets(selected.fetched.raw);
|
|
63
|
+
const contentHash = sha256(extractedMarkdown);
|
|
64
|
+
|
|
65
|
+
const manifest = {
|
|
66
|
+
id: captureId,
|
|
67
|
+
url: targets.originalUrl,
|
|
68
|
+
canonical_url: selected.extracted.canonicalUrl ?? selected.fetched.finalUrl,
|
|
69
|
+
final_url: selected.fetched.finalUrl,
|
|
70
|
+
title,
|
|
71
|
+
byline: selected.extracted.byline,
|
|
72
|
+
site_name: selected.extracted.siteName,
|
|
73
|
+
excerpt: selected.extracted.excerpt,
|
|
74
|
+
published_at: selected.extracted.publishedAt,
|
|
75
|
+
content_type: selected.fetched.contentType,
|
|
76
|
+
captured_at: capturedAt,
|
|
77
|
+
original: `original/${originalName}`,
|
|
78
|
+
extracted: "extracted.md",
|
|
79
|
+
metadata: "metadata.json",
|
|
80
|
+
attempts: selected.attempts,
|
|
81
|
+
extraction: {
|
|
82
|
+
extractor: selected.extracted.extractor,
|
|
83
|
+
strategy: selected.fetched.candidate.strategy,
|
|
84
|
+
candidate_kind: selected.fetched.candidate.kind,
|
|
85
|
+
},
|
|
86
|
+
quality: selected.quality,
|
|
87
|
+
content_hash: contentHash,
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
const metadata = {
|
|
91
|
+
title,
|
|
92
|
+
byline: selected.extracted.byline,
|
|
93
|
+
site_name: selected.extracted.siteName,
|
|
94
|
+
excerpt: selected.extracted.excerpt,
|
|
95
|
+
published_at: selected.extracted.publishedAt,
|
|
96
|
+
source_url: targets.originalUrl,
|
|
97
|
+
canonical_url: selected.extracted.canonicalUrl ?? selected.fetched.finalUrl,
|
|
98
|
+
final_url: selected.fetched.finalUrl,
|
|
99
|
+
content_hash: contentHash,
|
|
100
|
+
extractor: selected.extracted.extractor,
|
|
101
|
+
fetch_strategy: selected.fetched.candidate.strategy,
|
|
102
|
+
quality: selected.quality,
|
|
103
|
+
};
|
|
104
|
+
|
|
105
|
+
writeFileSync(join(packetPath, "manifest.json"), JSON.stringify(manifest, null, 2) + "\n", { mode: 0o600 });
|
|
106
|
+
writeFileSync(join(packetPath, "metadata.json"), JSON.stringify(metadata, null, 2) + "\n", { mode: 0o600 });
|
|
107
|
+
writeFileSync(join(packetPath, "original", originalName), redactedRaw, { mode: 0o600 });
|
|
108
|
+
writeFileSync(join(packetPath, "extracted.md"), extractedMarkdown, { mode: 0o600 });
|
|
109
|
+
|
|
110
|
+
const pageMarkdown = renderSourcePage({
|
|
111
|
+
title,
|
|
112
|
+
url: targets.originalUrl,
|
|
113
|
+
canonicalUrl: selected.extracted.canonicalUrl ?? selected.fetched.finalUrl,
|
|
114
|
+
capturedAt,
|
|
115
|
+
captureId,
|
|
116
|
+
packetRelPath,
|
|
117
|
+
extractedMarkdown,
|
|
118
|
+
quality: selected.quality,
|
|
119
|
+
warnings: selected.quality.warnings,
|
|
120
|
+
});
|
|
121
|
+
writeFileSync(sourcePagePath, pageMarkdown, { mode: 0o600 });
|
|
122
|
+
|
|
123
|
+
updateRegistry(vaultPath, {
|
|
124
|
+
path: normalizePath(sourcePageRelPath),
|
|
125
|
+
title,
|
|
126
|
+
kind: "web_source",
|
|
127
|
+
source_url: targets.originalUrl,
|
|
128
|
+
source_packet: packetRelPath,
|
|
129
|
+
content_hash: sha256(pageMarkdown),
|
|
130
|
+
generated: true,
|
|
131
|
+
created_at: capturedAt,
|
|
132
|
+
updated_at: capturedAt,
|
|
133
|
+
});
|
|
134
|
+
|
|
135
|
+
return {
|
|
136
|
+
vaultPath,
|
|
137
|
+
pagePath: sourcePagePath,
|
|
138
|
+
sourcePacketPath: packetPath,
|
|
139
|
+
title,
|
|
140
|
+
url: targets.originalUrl,
|
|
141
|
+
finalUrl: selected.fetched.finalUrl,
|
|
142
|
+
initialized: !wasInitialized,
|
|
143
|
+
quality: selected.quality.quality,
|
|
144
|
+
qualityScore: selected.quality.score,
|
|
145
|
+
warnings: selected.quality.warnings,
|
|
146
|
+
};
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
interface ExtractedCandidate {
|
|
150
|
+
fetched: FetchedCandidate;
|
|
151
|
+
extracted: ExtractedArticle;
|
|
152
|
+
quality: CaptureQualityReport;
|
|
153
|
+
attempts: CaptureFetchAttempt[];
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
async function fetchAndExtractBest(candidates: CaptureCandidate[], options: CaptureUrlOptions): Promise<ExtractedCandidate> {
|
|
157
|
+
const allAttempts: CaptureFetchAttempt[] = [];
|
|
158
|
+
let best: ExtractedCandidate | null = null;
|
|
159
|
+
const errors: string[] = [];
|
|
160
|
+
|
|
161
|
+
for (const candidate of candidates) {
|
|
162
|
+
try {
|
|
163
|
+
const fetched = await fetchCandidate(candidate, options);
|
|
164
|
+
allAttempts.push(...fetched.attempts);
|
|
165
|
+
const redactedRaw = redactSecrets(fetched.raw);
|
|
166
|
+
const extracted = extractArticle({
|
|
167
|
+
raw: redactedRaw,
|
|
168
|
+
contentType: fetched.contentType,
|
|
169
|
+
url: fetched.finalUrl,
|
|
170
|
+
candidateKind: candidate.kind,
|
|
171
|
+
});
|
|
172
|
+
const quality = assessCaptureQuality({
|
|
173
|
+
title: extracted.title,
|
|
174
|
+
markdown: extracted.markdown,
|
|
175
|
+
extractor: extracted.extractor,
|
|
176
|
+
});
|
|
177
|
+
const current: ExtractedCandidate = { fetched, extracted, quality, attempts: [...allAttempts] };
|
|
178
|
+
if (!best || current.quality.score > best.quality.score) best = current;
|
|
179
|
+
if (quality.quality === "good") return current;
|
|
180
|
+
} catch (error) {
|
|
181
|
+
const attempts = (error as Error & { attempts?: CaptureFetchAttempt[] }).attempts;
|
|
182
|
+
if (attempts) allAttempts.push(...attempts);
|
|
183
|
+
errors.push(`${candidate.strategy}: ${error instanceof Error ? error.message : String(error)}`);
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
if (best) return { ...best, attempts: allAttempts };
|
|
188
|
+
throw new Error(`Unable to fetch article. Attempts failed: ${errors.join("; ")}`);
|
|
189
|
+
}
|
|
190
|
+
|
|
191
|
+
function renderSourcePage(input: {
|
|
192
|
+
title: string;
|
|
193
|
+
url: string;
|
|
194
|
+
canonicalUrl: string;
|
|
195
|
+
capturedAt: string;
|
|
196
|
+
captureId: string;
|
|
197
|
+
packetRelPath: string;
|
|
198
|
+
extractedMarkdown: string;
|
|
199
|
+
quality: CaptureQualityReport;
|
|
200
|
+
warnings: string[];
|
|
201
|
+
}): string {
|
|
202
|
+
const warningLines = input.warnings.length > 0
|
|
203
|
+
? ["", "Warnings:", ...input.warnings.map((warning) => `- ${warning}`)]
|
|
204
|
+
: [];
|
|
205
|
+
|
|
206
|
+
return [
|
|
207
|
+
"---",
|
|
208
|
+
`title: ${JSON.stringify(input.title)}`,
|
|
209
|
+
"kind: web_source",
|
|
210
|
+
`source_url: ${JSON.stringify(input.url)}`,
|
|
211
|
+
`canonical_url: ${JSON.stringify(input.canonicalUrl)}`,
|
|
212
|
+
`source_packet: ${JSON.stringify(input.packetRelPath)}`,
|
|
213
|
+
`captured_at: ${JSON.stringify(input.capturedAt)}`,
|
|
214
|
+
`capture_id: ${JSON.stringify(input.captureId)}`,
|
|
215
|
+
`quality: ${JSON.stringify(input.quality.quality)}`,
|
|
216
|
+
`quality_score: ${input.quality.score}`,
|
|
217
|
+
"generated: true",
|
|
218
|
+
"source: pi-memory-stone",
|
|
219
|
+
"---",
|
|
220
|
+
"",
|
|
221
|
+
`# ${input.title.replace(/[\r\n]+/g, " ").trim()}`,
|
|
222
|
+
"",
|
|
223
|
+
`Source: ${input.url}`,
|
|
224
|
+
`Canonical: ${input.canonicalUrl}`,
|
|
225
|
+
`Captured: ${input.capturedAt}`,
|
|
226
|
+
`Quality: ${input.quality.quality} (${input.quality.score})`,
|
|
227
|
+
`Source packet: ${input.captureId} (stored outside vault: ${input.packetRelPath})`,
|
|
228
|
+
...warningLines,
|
|
229
|
+
"",
|
|
230
|
+
"## Extracted text",
|
|
231
|
+
"",
|
|
232
|
+
input.extractedMarkdown.trim() || "_No text extracted._",
|
|
233
|
+
"",
|
|
234
|
+
].join("\n");
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
function updateRegistry(vaultPath: string, page: VaultRegistryPage & {
|
|
238
|
+
source_url: string;
|
|
239
|
+
source_packet: string;
|
|
240
|
+
}): void {
|
|
241
|
+
const registryPath = join(vaultPath, "meta", "registry.json");
|
|
242
|
+
const registry = JSON.parse(readFileSync(registryPath, "utf8")) as VaultRegistry;
|
|
243
|
+
const pages = registry.pages.filter((existing) => existing.path !== page.path);
|
|
244
|
+
pages.push(page);
|
|
245
|
+
pages.sort((a, b) => a.path.localeCompare(b.path));
|
|
246
|
+
registry.pages = pages;
|
|
247
|
+
registry.generated_at = new Date().toISOString();
|
|
248
|
+
writeFileSync(registryPath, JSON.stringify(registry, null, 2) + "\n", { mode: 0o600 });
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
function originalArtifactName(contentType: string, finalUrl: string, extractor: string): string {
|
|
252
|
+
if (contentType.includes("html") || extractor.startsWith("html")) return "response.html";
|
|
253
|
+
if (contentType.includes("markdown") || finalUrl.toLowerCase().match(/\.(md|markdown|mdx)(?:$|[?#])/) || extractor === "markdown") return "response.md";
|
|
254
|
+
if (contentType.includes("pdf") || finalUrl.toLowerCase().match(/\.pdf(?:$|[?#])/)) return "response.pdf.txt";
|
|
255
|
+
return "response.txt";
|
|
256
|
+
}
|
|
257
|
+
|
|
258
|
+
function normalizePath(path: string): string {
|
|
259
|
+
return path.split(/[\\/]+/).join("/");
|
|
260
|
+
}
|
|
261
|
+
|
|
262
|
+
function unescapeRedactionMarkers(markdown: string): string {
|
|
263
|
+
return markdown.replace(/\\\[REDACTED:([a-z-]+)\\\]/g, "[REDACTED:$1]");
|
|
264
|
+
}
|
|
265
|
+
|
|
266
|
+
function sha256(content: string): string {
|
|
267
|
+
return createHash("sha256").update(content).digest("hex");
|
|
268
|
+
}
|