pi-memory-stone 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,306 @@
1
+ /**
2
+ * Optional Obsidian-compatible knowledge vault layer for pi-memory-stone.
3
+ *
4
+ * SQLite remains the source of truth. Vault pages are generated, reviewable
5
+ * markdown projections of active memory records.
6
+ */
7
+
8
+ import { createHash } from "node:crypto";
9
+ import { existsSync, mkdirSync, readFileSync, readdirSync, statSync, writeFileSync } from "node:fs";
10
+ import { join, relative } from "node:path";
11
+ import { listRecords, type RecordRow } from "../db/index.js";
12
+ import { kindDirectory, recordMarkdown, recordTitle } from "./markdown.js";
13
+ import { resolveVaultPath, type VaultScope } from "./paths.js";
14
+
15
+ export { isVaultScope, parseVaultScope, resolveVaultPath, type VaultScope } from "./paths.js";
16
+ export { kindDirectory, parseTags, recordMarkdown, recordTitle, sanitizeSlug } from "./markdown.js";
17
+
18
+ const VAULT_SCHEMA_VERSION = 1;
19
+
20
+ export interface VaultInitResult {
21
+ path: string;
22
+ created: boolean;
23
+ }
24
+
25
+ export interface VaultSyncResult {
26
+ path: string;
27
+ records: number;
28
+ pagesWritten: number;
29
+ registryPath: string;
30
+ }
31
+
32
+ export interface VaultStatus {
33
+ path: string;
34
+ initialized: boolean;
35
+ registryExists: boolean;
36
+ pageCount: number;
37
+ recordPageCount: number;
38
+ lastSyncedAt: string | null;
39
+ }
40
+
41
+ export interface VaultRegistry {
42
+ format: "pi-memory-stone-vault-registry";
43
+ version: number;
44
+ scope: VaultScope;
45
+ project_id: string | null;
46
+ generated_at: string;
47
+ pages: VaultRegistryPage[];
48
+ }
49
+
50
+ export interface VaultRegistryPage {
51
+ path: string;
52
+ title: string;
53
+ kind: string;
54
+ source_record_id?: string;
55
+ source_url?: string;
56
+ source_packet?: string;
57
+ content_hash: string;
58
+ generated: true;
59
+ created_at: string;
60
+ updated_at: string;
61
+ }
62
+
63
+ export function initVault(scope: VaultScope, projectId: string | null, cwd: string): VaultInitResult {
64
+ const vaultPath = resolveVaultPath(scope, projectId, cwd);
65
+ const alreadyInitialized = existsSync(join(vaultPath, "WIKI_SCHEMA.md"));
66
+
67
+ ensureVaultDirectories(vaultPath);
68
+ writeIfMissing(join(vaultPath, "WIKI_SCHEMA.md"), schemaMarkdown(scope));
69
+ writeIfMissing(join(vaultPath, "index.md"), indexMarkdown(scope, projectId, []));
70
+ writeIfMissing(join(vaultPath, "meta", "registry.json"), JSON.stringify(emptyRegistry(scope, projectId), null, 2) + "\n");
71
+
72
+ return { path: vaultPath, created: !alreadyInitialized };
73
+ }
74
+
75
+ export function syncVault(scope: VaultScope, projectId: string | null, cwd: string): VaultSyncResult {
76
+ const vaultPath = resolveVaultPath(scope, projectId, cwd);
77
+ if (!isVaultInitialized(vaultPath)) {
78
+ throw new Error(`Vault is not initialized at ${vaultPath}. Run /memory-vault-init first.`);
79
+ }
80
+
81
+ ensureVaultDirectories(vaultPath);
82
+ const records = recordsForVault(scope, projectId);
83
+ const pages: VaultRegistryPage[] = [];
84
+ let pagesWritten = 0;
85
+
86
+ for (const record of records) {
87
+ const relativePagePath = join("records", kindDirectory(record.kind), `${record.id}.md`);
88
+ const outputPath = join(vaultPath, relativePagePath);
89
+ mkdirSync(join(vaultPath, "records", kindDirectory(record.kind)), { recursive: true, mode: 0o700 });
90
+
91
+ const content = recordMarkdown(record);
92
+ const hash = sha256(content);
93
+ const existing = existsSync(outputPath) ? readFileSync(outputPath, "utf8") : null;
94
+ if (existing !== content) {
95
+ writeFileSync(outputPath, content, { mode: 0o600 });
96
+ pagesWritten += 1;
97
+ }
98
+
99
+ pages.push({
100
+ path: normalizePath(relativePagePath),
101
+ title: recordTitle(record),
102
+ kind: record.kind,
103
+ source_record_id: record.id,
104
+ content_hash: hash,
105
+ generated: true,
106
+ created_at: new Date(record.created_at).toISOString(),
107
+ updated_at: new Date(record.updated_at).toISOString(),
108
+ });
109
+ }
110
+
111
+ const existingRegistry = readRegistry(join(vaultPath, "meta", "registry.json"));
112
+ const preservedPages = existingRegistry?.pages.filter((page) => !page.source_record_id) ?? [];
113
+ const registry: VaultRegistry = {
114
+ format: "pi-memory-stone-vault-registry",
115
+ version: VAULT_SCHEMA_VERSION,
116
+ scope,
117
+ project_id: scope === "project" ? projectId : null,
118
+ generated_at: new Date().toISOString(),
119
+ pages: [...preservedPages, ...pages].sort((a, b) => a.path.localeCompare(b.path)),
120
+ };
121
+
122
+ const registryPath = join(vaultPath, "meta", "registry.json");
123
+ writeFileSync(registryPath, JSON.stringify(registry, null, 2) + "\n", { mode: 0o600 });
124
+ writeFileSync(join(vaultPath, "index.md"), indexMarkdown(scope, projectId, pages), { mode: 0o600 });
125
+
126
+ return { path: vaultPath, records: records.length, pagesWritten, registryPath };
127
+ }
128
+
129
+ export function getVaultStatus(scope: VaultScope, projectId: string | null, cwd: string): VaultStatus {
130
+ const vaultPath = resolveVaultPath(scope, projectId, cwd);
131
+ const registryPath = join(vaultPath, "meta", "registry.json");
132
+ const initialized = isVaultInitialized(vaultPath);
133
+ const registry = readRegistry(registryPath);
134
+
135
+ return {
136
+ path: vaultPath,
137
+ initialized,
138
+ registryExists: existsSync(registryPath),
139
+ pageCount: countMarkdownFiles(vaultPath),
140
+ recordPageCount: registry?.pages.filter((page) => Boolean(page.source_record_id)).length ?? 0,
141
+ lastSyncedAt: registry?.generated_at ?? null,
142
+ };
143
+ }
144
+
145
+ function recordsForVault(scope: VaultScope, projectId: string | null): RecordRow[] {
146
+ return listRecords().filter((record) => {
147
+ if (scope === "personal") return record.scope === "global";
148
+ return record.scope === "project" && record.project_id === projectId;
149
+ });
150
+ }
151
+
152
+ function ensureVaultDirectories(vaultPath: string): void {
153
+ const dirs = [
154
+ vaultPath,
155
+ join(vaultPath, "records"),
156
+ join(vaultPath, "records", "decisions"),
157
+ join(vaultPath, "records", "preferences"),
158
+ join(vaultPath, "records", "tasks"),
159
+ join(vaultPath, "records", "error-resolutions"),
160
+ join(vaultPath, "records", "turn-summaries"),
161
+ join(vaultPath, "records", "session-summaries"),
162
+ join(vaultPath, "records", "file-activity"),
163
+ join(vaultPath, "concepts"),
164
+ join(vaultPath, "projects"),
165
+ join(vaultPath, "sessions"),
166
+ join(vaultPath, "syntheses"),
167
+ join(vaultPath, "sources"),
168
+ join(vaultPath, "meta"),
169
+ ];
170
+ for (const dir of dirs) {
171
+ mkdirSync(dir, { recursive: true, mode: 0o700 });
172
+ }
173
+ }
174
+
175
+ function isVaultInitialized(vaultPath: string): boolean {
176
+ return existsSync(join(vaultPath, "WIKI_SCHEMA.md")) && existsSync(join(vaultPath, "meta", "registry.json"));
177
+ }
178
+
179
+ function writeIfMissing(path: string, content: string): void {
180
+ if (!existsSync(path)) {
181
+ writeFileSync(path, content, { mode: 0o600 });
182
+ }
183
+ }
184
+
185
+ function emptyRegistry(scope: VaultScope, projectId: string | null): VaultRegistry {
186
+ return {
187
+ format: "pi-memory-stone-vault-registry",
188
+ version: VAULT_SCHEMA_VERSION,
189
+ scope,
190
+ project_id: scope === "project" ? projectId : null,
191
+ generated_at: new Date().toISOString(),
192
+ pages: [],
193
+ };
194
+ }
195
+
196
+ function schemaMarkdown(scope: VaultScope): string {
197
+ return [
198
+ "# Memory Stone Vault Schema",
199
+ "",
200
+ "This vault is an Obsidian-compatible markdown projection of pi-memory-stone records.",
201
+ "SQLite remains the source of truth; generated pages may be overwritten by `/memory-vault-sync`.",
202
+ "",
203
+ "## Layout",
204
+ "",
205
+ "```txt",
206
+ ".memory-stone/vault/ or ~/.pi/agent/memory/vaults/personal/",
207
+ " index.md",
208
+ " records/",
209
+ " decisions/",
210
+ " preferences/",
211
+ " tasks/",
212
+ " error-resolutions/",
213
+ " turn-summaries/",
214
+ " session-summaries/",
215
+ " concepts/",
216
+ " projects/",
217
+ " sessions/",
218
+ " syntheses/",
219
+ " sources/",
220
+ " meta/registry.json",
221
+ "```",
222
+ "",
223
+ "## Scope",
224
+ "",
225
+ `This vault was initialized as a \`${scope}\` vault.`,
226
+ "",
227
+ "## Generated pages",
228
+ "",
229
+ "Generated pages include frontmatter with `generated: true` and `source: pi-memory-stone`.",
230
+ "Use human-authored pages outside `records/` or clear `generated` before treating edits as durable.",
231
+ "",
232
+ ].join("\n");
233
+ }
234
+
235
+ function indexMarkdown(scope: VaultScope, projectId: string | null, pages: VaultRegistryPage[]): string {
236
+ const byKind = new Map<string, number>();
237
+ for (const page of pages) {
238
+ byKind.set(page.kind, (byKind.get(page.kind) ?? 0) + 1);
239
+ }
240
+
241
+ const lines: string[] = [];
242
+ lines.push("# Memory Stone Vault");
243
+ lines.push("");
244
+ lines.push(`Scope: \`${scope}\``);
245
+ if (projectId && scope === "project") lines.push(`Project: \`${projectId}\``);
246
+ lines.push(`Generated pages: ${pages.length}`);
247
+ lines.push("");
248
+ lines.push("## Records by kind");
249
+ lines.push("");
250
+ if (byKind.size === 0) {
251
+ lines.push("_No synced records yet._");
252
+ } else {
253
+ for (const [kind, count] of [...byKind.entries()].sort(([a], [b]) => a.localeCompare(b))) {
254
+ lines.push(`- [[${kind}]]: ${count}`);
255
+ }
256
+ }
257
+ lines.push("");
258
+ lines.push("## Generated record pages");
259
+ lines.push("");
260
+ for (const page of pages) {
261
+ lines.push(`- [${page.title}](${encodeURI(page.path)})`);
262
+ }
263
+ lines.push("");
264
+ return lines.join("\n");
265
+ }
266
+
267
+ function readRegistry(path: string): VaultRegistry | null {
268
+ if (!existsSync(path)) return null;
269
+ try {
270
+ const parsed = JSON.parse(readFileSync(path, "utf8")) as VaultRegistry;
271
+ return parsed?.format === "pi-memory-stone-vault-registry" ? parsed : null;
272
+ } catch {
273
+ return null;
274
+ }
275
+ }
276
+
277
+ function countMarkdownFiles(root: string): number {
278
+ if (!existsSync(root)) return 0;
279
+ let count = 0;
280
+ const stack = [root];
281
+ while (stack.length > 0) {
282
+ const dir = stack.pop()!;
283
+ for (const name of readdirSync(dir)) {
284
+ const path = join(dir, name);
285
+ const stat = statSync(path);
286
+ if (stat.isDirectory()) {
287
+ stack.push(path);
288
+ } else if (stat.isFile() && path.endsWith(".md")) {
289
+ count += 1;
290
+ }
291
+ }
292
+ }
293
+ return count;
294
+ }
295
+
296
+ function normalizePath(path: string): string {
297
+ return path.split(/[\\/]+/).join("/");
298
+ }
299
+
300
+ function sha256(content: string): string {
301
+ return createHash("sha256").update(content).digest("hex").slice(0, 16);
302
+ }
303
+
304
+ export function relativeVaultPath(vaultPath: string, filePath: string): string {
305
+ return normalizePath(relative(vaultPath, filePath));
306
+ }
@@ -0,0 +1,37 @@
1
+ /** Natural-language intent parsing for lightweight vault capture requests. */
2
+
3
+ import { isVaultScope, type VaultScope } from "./paths.js";
4
+
5
+ const URL_PATTERN = /https?:\/\/[^\s<>)\]"']+/i;
6
+
7
+ export interface VaultCaptureIntent {
8
+ url: string;
9
+ scope: VaultScope;
10
+ }
11
+
12
+ export function parseVaultCaptureIntent(prompt: string): VaultCaptureIntent | null {
13
+ const url = extractFirstUrl(prompt);
14
+ if (!url) return null;
15
+
16
+ const lower = prompt.toLowerCase();
17
+ if (!lower.includes("vault")) return null;
18
+ if (!/\b(add|capture|save|store|clip|archive|ingest)\b/.test(lower)) return null;
19
+
20
+ return {
21
+ url,
22
+ scope: inferScope(lower),
23
+ };
24
+ }
25
+
26
+ export function extractFirstUrl(text: string): string | null {
27
+ const match = text.match(URL_PATTERN);
28
+ if (!match) return null;
29
+ return match[0].replace(/[.,;:!?]+$/, "");
30
+ }
31
+
32
+ function inferScope(lowerPrompt: string): VaultScope {
33
+ const explicit = lowerPrompt.match(/--scope\s+(project|personal)\b/)?.[1];
34
+ if (explicit && isVaultScope(explicit)) return explicit;
35
+ if (/--personal\b|\bpersonal(?:\s+memory)?\s+vault\b|\bglobal(?:\s+memory)?\s+vault\b/.test(lowerPrompt)) return "personal";
36
+ return "project";
37
+ }
@@ -0,0 +1,120 @@
1
+ /** Markdown rendering helpers for Obsidian-compatible memory vault pages. */
2
+
3
+ import type { RecordRow } from "../db/index.js";
4
+
5
+ const KIND_TITLES: Record<string, string> = {
6
+ decision: "Decision",
7
+ preference: "Preference",
8
+ task: "Task",
9
+ error_resolution: "Error Resolution",
10
+ turn_summary: "Turn Summary",
11
+ session_summary: "Session Summary",
12
+ file_activity: "File Activity",
13
+ };
14
+
15
+ export function recordTitle(record: RecordRow): string {
16
+ const prefix = KIND_TITLES[record.kind] ?? record.kind;
17
+ const firstLine = record.text.split(/\r?\n/).map((line) => line.trim()).find(Boolean) ?? record.id;
18
+ const cleaned = firstLine.replace(/^#+\s*/, "").replace(/\s+/g, " ").trim();
19
+ const excerpt = cleaned.length > 80 ? `${cleaned.slice(0, 77)}...` : cleaned;
20
+ return `${prefix}: ${excerpt}`;
21
+ }
22
+
23
+ export function kindDirectory(kind: string): string {
24
+ switch (kind) {
25
+ case "decision": return "decisions";
26
+ case "preference": return "preferences";
27
+ case "task": return "tasks";
28
+ case "error_resolution": return "error-resolutions";
29
+ case "turn_summary": return "turn-summaries";
30
+ case "session_summary": return "session-summaries";
31
+ case "file_activity": return "file-activity";
32
+ default: return sanitizeSlug(kind);
33
+ }
34
+ }
35
+
36
+ export function recordMarkdown(record: RecordRow): string {
37
+ const title = recordTitle(record);
38
+ const tags = parseTags(record.tags);
39
+ const frontmatter = renderFrontmatter({
40
+ id: record.id,
41
+ kind: record.kind,
42
+ scope: record.scope,
43
+ project_id: record.project_id,
44
+ session_id: record.session_id,
45
+ created_at: new Date(record.created_at).toISOString(),
46
+ updated_at: new Date(record.updated_at).toISOString(),
47
+ tags,
48
+ generated: true,
49
+ source: "pi-memory-stone",
50
+ });
51
+
52
+ const lines: string[] = [];
53
+ lines.push(frontmatter);
54
+ lines.push(`# ${escapeMarkdownHeading(title)}`);
55
+ lines.push("");
56
+ lines.push(record.text.trim() || "_No memory text captured._");
57
+ lines.push("");
58
+ lines.push("## Metadata");
59
+ lines.push("");
60
+ lines.push(`- Memory ref: \`${record.id}\``);
61
+ lines.push(`- Kind: \`${record.kind}\``);
62
+ lines.push(`- Scope: \`${record.scope}\``);
63
+ if (record.project_id) lines.push(`- Project: \`${record.project_id}\``);
64
+ if (record.session_id) lines.push(`- Session: \`${record.session_id}\``);
65
+ if (tags.length > 0) lines.push(`- Tags: ${tags.map((tag) => `#${sanitizeTag(tag)}`).join(" ")}`);
66
+ lines.push("");
67
+ lines.push("## Links");
68
+ lines.push("");
69
+ lines.push(`- [[${record.kind}]]`);
70
+ lines.push("- [[pi-memory-stone]]");
71
+ lines.push("");
72
+
73
+ return lines.join("\n");
74
+ }
75
+
76
+ export function parseTags(tags: string | null | undefined): string[] {
77
+ if (!tags) return [];
78
+ return tags
79
+ .split(/[;,]/)
80
+ .map((tag) => tag.trim())
81
+ .filter(Boolean);
82
+ }
83
+
84
+ export function sanitizeSlug(value: string): string {
85
+ const slug = value
86
+ .toLowerCase()
87
+ .replace(/[^a-z0-9]+/g, "-")
88
+ .replace(/^-+|-+$/g, "");
89
+ return slug || "untitled";
90
+ }
91
+
92
+ function sanitizeTag(tag: string): string {
93
+ return tag.replace(/\s+/g, "-").replace(/[^\p{L}\p{N}_/-]/gu, "");
94
+ }
95
+
96
+ function escapeMarkdownHeading(value: string): string {
97
+ return value.replace(/[\r\n]+/g, " ").trim();
98
+ }
99
+
100
+ type FrontmatterValue = string | number | boolean | null | string[];
101
+
102
+ function renderFrontmatter(values: Record<string, FrontmatterValue>): string {
103
+ const lines = ["---"];
104
+ for (const [key, value] of Object.entries(values)) {
105
+ if (value === null) continue;
106
+ if (Array.isArray(value)) {
107
+ lines.push(`${key}: [${value.map((item) => yamlString(item)).join(", ")}]`);
108
+ } else if (typeof value === "boolean" || typeof value === "number") {
109
+ lines.push(`${key}: ${value}`);
110
+ } else {
111
+ lines.push(`${key}: ${yamlString(value)}`);
112
+ }
113
+ }
114
+ lines.push("---");
115
+ return lines.join("\n");
116
+ }
117
+
118
+ function yamlString(value: string): string {
119
+ return JSON.stringify(value);
120
+ }
@@ -0,0 +1,44 @@
1
+ /**
2
+ * Knowledge vault path resolution.
3
+ *
4
+ * Project vaults are opt-in and live inside the current project. Personal
5
+ * vaults stay under pi-memory-stone's private memory directory by default.
6
+ */
7
+
8
+ import { join } from "node:path";
9
+ import { getMemoryDir } from "../config/index.js";
10
+
11
+ export type VaultScope = "project" | "personal";
12
+
13
+ export function resolveVaultPath(scope: VaultScope, projectId: string | null, cwd: string): string {
14
+ if (scope === "personal") {
15
+ return process.env.PI_MEMORY_STONE_PERSONAL_VAULT_PATH
16
+ ?? join(getMemoryDir(), "vaults", "personal");
17
+ }
18
+
19
+ const root = projectId ?? cwd;
20
+ return join(root, ".memory-stone", "vault");
21
+ }
22
+
23
+ export function resolveSourcePacketPath(scope: VaultScope, projectId: string | null, cwd: string, captureId: string): string {
24
+ if (scope === "personal") {
25
+ return join(getMemoryDir(), "source-packets", "personal", captureId);
26
+ }
27
+
28
+ const root = projectId ?? cwd;
29
+ return join(root, ".memory-stone", "source-packets", captureId);
30
+ }
31
+
32
+ export function isVaultScope(value: string): value is VaultScope {
33
+ return value === "project" || value === "personal";
34
+ }
35
+
36
+ export function parseVaultScope(args: { flags: Set<string>; options: Map<string, string> }): VaultScope | undefined {
37
+ const explicitScope = args.options.get("scope");
38
+ if (explicitScope) {
39
+ return isVaultScope(explicitScope) ? explicitScope : undefined;
40
+ }
41
+ if (args.flags.has("personal")) return "personal";
42
+ if (args.flags.has("project")) return "project";
43
+ return "project";
44
+ }
@@ -0,0 +1,65 @@
1
+ /** Extraction quality scoring for captured source pages. */
2
+
3
+ export type CaptureQuality = "good" | "weak";
4
+
5
+ export interface CaptureQualityReport {
6
+ quality: CaptureQuality;
7
+ score: number;
8
+ warnings: string[];
9
+ plainTextChars: number;
10
+ }
11
+
12
+ export function assessCaptureQuality(input: { title: string; markdown: string; extractor: string }): CaptureQualityReport {
13
+ const plain = markdownToPlainText(input.markdown);
14
+ const warnings: string[] = [];
15
+ let score = 0;
16
+
17
+ if (input.title.trim().length > 0) score += 0.2;
18
+ else warnings.push("missing title");
19
+
20
+ if (plain.length >= 500) score += 0.35;
21
+ else if (plain.length >= 120) {
22
+ score += 0.18;
23
+ warnings.push("short extracted text");
24
+ } else {
25
+ warnings.push("very short extracted text");
26
+ }
27
+
28
+ const paragraphCount = input.markdown.split(/\n{2,}/).filter((paragraph) => markdownToPlainText(paragraph).length >= 40).length;
29
+ if (paragraphCount >= 3) score += 0.2;
30
+ else warnings.push("few article-like paragraphs");
31
+
32
+ const linkOnlyRatio = linkOnlyLineRatio(input.markdown);
33
+ if (linkOnlyRatio <= 0.35) score += 0.15;
34
+ else warnings.push("high link/navigation ratio");
35
+
36
+ if (input.extractor === "html-readability" || input.extractor === "markdown" || input.extractor === "text") {
37
+ score += 0.1;
38
+ }
39
+
40
+ score = Math.min(1, Number(score.toFixed(2)));
41
+ return {
42
+ quality: score >= 0.65 ? "good" : "weak",
43
+ score,
44
+ warnings,
45
+ plainTextChars: plain.length,
46
+ };
47
+ }
48
+
49
+ function markdownToPlainText(markdown: string): string {
50
+ return markdown
51
+ .replace(/```[\s\S]*?```/g, " ")
52
+ .replace(/`[^`]*`/g, " ")
53
+ .replace(/!\[[^\]]*]\([^)]*\)/g, " ")
54
+ .replace(/\[([^\]]+)]\([^)]*\)/g, "$1")
55
+ .replace(/[#>*_~\-]+/g, " ")
56
+ .replace(/\s+/g, " ")
57
+ .trim();
58
+ }
59
+
60
+ function linkOnlyLineRatio(markdown: string): number {
61
+ const lines = markdown.split(/\r?\n/).map((line) => line.trim()).filter(Boolean);
62
+ if (lines.length === 0) return 1;
63
+ const linkish = lines.filter((line) => /^[-*]?\s*\[[^\]]+][^)]+\)?\s*$/.test(line) || /^[-*]?\s*https?:\/\//.test(line)).length;
64
+ return linkish / lines.length;
65
+ }
@@ -0,0 +1,113 @@
1
+ /** URL normalization and article-capture candidate resolution. */
2
+
3
+ export type CaptureCandidateKind = "html" | "markdown" | "text" | "pdf" | "unknown";
4
+
5
+ export interface CaptureCandidate {
6
+ url: string;
7
+ kind: CaptureCandidateKind;
8
+ strategy: string;
9
+ priority: number;
10
+ }
11
+
12
+ export interface CaptureTargets {
13
+ originalUrl: string;
14
+ canonicalUrl: string;
15
+ candidates: CaptureCandidate[];
16
+ }
17
+
18
+ export function resolveCaptureTargets(inputUrl: string): CaptureTargets {
19
+ const parsed = parseHttpUrl(inputUrl);
20
+ const candidates: CaptureCandidate[] = [];
21
+
22
+ const gistRaw = resolveGistRaw(parsed);
23
+ if (gistRaw) {
24
+ candidates.push({ url: gistRaw, kind: "markdown", strategy: "gist-raw", priority: 100 });
25
+ }
26
+
27
+ const githubRaw = resolveGithubRaw(parsed);
28
+ if (githubRaw) {
29
+ candidates.push({ url: githubRaw.url, kind: githubRaw.kind, strategy: "github-raw", priority: 95 });
30
+ }
31
+
32
+ candidates.push({
33
+ url: parsed.href,
34
+ kind: inferKind(parsed.href),
35
+ strategy: "direct",
36
+ priority: 10,
37
+ });
38
+
39
+ const deduped = dedupeCandidates(candidates)
40
+ .sort((a, b) => b.priority - a.priority);
41
+
42
+ return {
43
+ originalUrl: parsed.href,
44
+ canonicalUrl: deduped[0]?.url ?? parsed.href,
45
+ candidates: deduped,
46
+ };
47
+ }
48
+
49
+ export function parseHttpUrl(url: string): URL {
50
+ let parsed: URL;
51
+ try {
52
+ parsed = new URL(url);
53
+ } catch {
54
+ throw new Error(`Invalid URL: ${url}`);
55
+ }
56
+ if (parsed.protocol !== "http:" && parsed.protocol !== "https:") {
57
+ throw new Error("Only http:// and https:// URLs can be captured");
58
+ }
59
+ return parsed;
60
+ }
61
+
62
+ function resolveGistRaw(url: URL): string | null {
63
+ if (url.hostname === "gist.githubusercontent.com" && url.pathname.includes("/raw")) {
64
+ return url.href;
65
+ }
66
+ if (url.hostname !== "gist.github.com") return null;
67
+
68
+ const parts = url.pathname.split("/").filter(Boolean);
69
+ if (parts.length < 2) return null;
70
+
71
+ const [owner, gistId] = parts;
72
+ if (!owner || !gistId) return null;
73
+
74
+ return `https://gist.githubusercontent.com/${encodeURIComponent(owner)}/${encodeURIComponent(gistId)}/raw`;
75
+ }
76
+
77
+ function resolveGithubRaw(url: URL): { url: string; kind: CaptureCandidateKind } | null {
78
+ if (url.hostname === "raw.githubusercontent.com") {
79
+ return { url: url.href, kind: inferKind(url.href) };
80
+ }
81
+ if (url.hostname !== "github.com") return null;
82
+
83
+ const parts = url.pathname.split("/").filter(Boolean);
84
+ if (parts.length < 5) return null;
85
+
86
+ const [owner, repo, mode, branch, ...fileParts] = parts;
87
+ if (!owner || !repo || !branch || fileParts.length === 0) return null;
88
+ if (mode !== "blob" && mode !== "raw") return null;
89
+
90
+ const rawUrl = new URL(`https://raw.githubusercontent.com/${encodeURIComponent(owner)}/${encodeURIComponent(repo)}/${encodeURIComponent(branch)}/${fileParts.map(encodeURIComponent).join("/")}`);
91
+ rawUrl.search = url.search;
92
+ return { url: rawUrl.href, kind: inferKind(rawUrl.href) };
93
+ }
94
+
95
+ function inferKind(url: string): CaptureCandidateKind {
96
+ const path = new URL(url).pathname.toLowerCase();
97
+ if (path.endsWith(".md") || path.endsWith(".markdown") || path.endsWith(".mdx")) return "markdown";
98
+ if (path.endsWith(".txt") || path.endsWith(".text")) return "text";
99
+ if (path.endsWith(".pdf")) return "pdf";
100
+ if (path.endsWith(".html") || path.endsWith(".htm")) return "html";
101
+ return "unknown";
102
+ }
103
+
104
+ function dedupeCandidates(candidates: CaptureCandidate[]): CaptureCandidate[] {
105
+ const seen = new Set<string>();
106
+ const deduped: CaptureCandidate[] = [];
107
+ for (const candidate of candidates) {
108
+ if (seen.has(candidate.url)) continue;
109
+ seen.add(candidate.url);
110
+ deduped.push(candidate);
111
+ }
112
+ return deduped;
113
+ }