@djolex999/vir-cli 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/claude/updater.ts +0 -273
- package/src/cli.ts +0 -953
- package/src/config.ts +0 -89
- package/src/daemon/launchd.ts +0 -115
- package/src/dedupe/detector.ts +0 -197
- package/src/dedupe/merger.ts +0 -172
- package/src/lint/linter.ts +0 -286
- package/src/pipeline/distiller.ts +0 -280
- package/src/pipeline/filter.ts +0 -43
- package/src/pipeline/parser.ts +0 -118
- package/src/pipeline/run.ts +0 -378
- package/src/pipeline/scanner.ts +0 -51
- package/src/pipeline/scrubber.ts +0 -55
- package/src/pipeline/summarizer.ts +0 -204
- package/src/pipeline/types.ts +0 -41
- package/src/pipeline/writer.ts +0 -242
- package/src/search/embedder.ts +0 -88
- package/src/search/retriever.ts +0 -255
- package/src/search/synthesizer.ts +0 -45
- package/src/state/db.ts +0 -451
- package/src/ui/display.ts +0 -184
- package/tsconfig.json +0 -23
- package/vir-flow.html +0 -708
package/src/config.ts
DELETED
|
@@ -1,89 +0,0 @@
|
|
|
1
|
-
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "node:fs";
|
|
2
|
-
import { homedir } from "node:os";
|
|
3
|
-
import { dirname, join } from "node:path";
|
|
4
|
-
import { z } from "zod";
|
|
5
|
-
|
|
6
|
-
export const ConfigSchema = z
|
|
7
|
-
.object({
|
|
8
|
-
vaultPath: z.string().min(1),
|
|
9
|
-
outputDir: z.string().min(1).default("vir"),
|
|
10
|
-
claudeProjectsDir: z.string().min(1),
|
|
11
|
-
cadenceHours: z.number().positive().default(4),
|
|
12
|
-
provider: z.enum(["anthropic", "kie"]).default("anthropic"),
|
|
13
|
-
anthropicApiKey: z.string().optional(),
|
|
14
|
-
kieApiKey: z.string().optional(),
|
|
15
|
-
filterThreshold: z.number().min(0).max(1).default(0.4),
|
|
16
|
-
models: z
|
|
17
|
-
.object({
|
|
18
|
-
classify: z.string().default("claude-haiku-4-5-20251001"),
|
|
19
|
-
distill: z.string().default("claude-sonnet-4-6"),
|
|
20
|
-
})
|
|
21
|
-
.default({
|
|
22
|
-
classify: "claude-haiku-4-5-20251001",
|
|
23
|
-
distill: "claude-sonnet-4-6",
|
|
24
|
-
}),
|
|
25
|
-
})
|
|
26
|
-
.superRefine((val, ctx) => {
|
|
27
|
-
if (val.provider === "anthropic" && !val.anthropicApiKey) {
|
|
28
|
-
ctx.addIssue({
|
|
29
|
-
code: z.ZodIssueCode.custom,
|
|
30
|
-
path: ["anthropicApiKey"],
|
|
31
|
-
message: "anthropicApiKey is required when provider is 'anthropic'",
|
|
32
|
-
});
|
|
33
|
-
}
|
|
34
|
-
if (val.provider === "kie" && !val.kieApiKey) {
|
|
35
|
-
ctx.addIssue({
|
|
36
|
-
code: z.ZodIssueCode.custom,
|
|
37
|
-
path: ["kieApiKey"],
|
|
38
|
-
message: "kieApiKey is required when provider is 'kie'",
|
|
39
|
-
});
|
|
40
|
-
}
|
|
41
|
-
});
|
|
42
|
-
|
|
43
|
-
export type Config = z.infer<typeof ConfigSchema>;
|
|
44
|
-
|
|
45
|
-
export const VIR_DIR = join(homedir(), ".vir");
|
|
46
|
-
export const CONFIG_PATH = join(VIR_DIR, "config.json");
|
|
47
|
-
export const STATE_PATH = join(VIR_DIR, "vir.db");
|
|
48
|
-
// One-shot migration: older builds used `state.db`. If only the old file
|
|
49
|
-
// exists, rename it on startup so the docs match reality. Never overwrites
|
|
50
|
-
// an existing `vir.db`.
|
|
51
|
-
export const LEGACY_STATE_PATH = join(VIR_DIR, "state.db");
|
|
52
|
-
export const DAEMON_LOG_PATH = join(VIR_DIR, "daemon.log");
|
|
53
|
-
|
|
54
|
-
export function expandHome(p: string): string {
|
|
55
|
-
if (p.startsWith("~/")) return join(homedir(), p.slice(2));
|
|
56
|
-
if (p === "~") return homedir();
|
|
57
|
-
return p;
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
export function ensureVirDir(): void {
|
|
61
|
-
if (!existsSync(VIR_DIR)) mkdirSync(VIR_DIR, { recursive: true });
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
export function configExists(): boolean {
|
|
65
|
-
return existsSync(CONFIG_PATH);
|
|
66
|
-
}
|
|
67
|
-
|
|
68
|
-
export function loadConfig(): Config {
|
|
69
|
-
if (!existsSync(CONFIG_PATH)) {
|
|
70
|
-
throw new Error(
|
|
71
|
-
`Config not found at ${CONFIG_PATH}. Run \`vir init\` first.`,
|
|
72
|
-
);
|
|
73
|
-
}
|
|
74
|
-
const raw = readFileSync(CONFIG_PATH, "utf8");
|
|
75
|
-
const parsed = ConfigSchema.parse(JSON.parse(raw));
|
|
76
|
-
return {
|
|
77
|
-
...parsed,
|
|
78
|
-
vaultPath: expandHome(parsed.vaultPath),
|
|
79
|
-
claudeProjectsDir: expandHome(parsed.claudeProjectsDir),
|
|
80
|
-
};
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
export function saveConfig(cfg: Config): void {
|
|
84
|
-
ensureVirDir();
|
|
85
|
-
if (!existsSync(dirname(CONFIG_PATH))) {
|
|
86
|
-
mkdirSync(dirname(CONFIG_PATH), { recursive: true });
|
|
87
|
-
}
|
|
88
|
-
writeFileSync(CONFIG_PATH, JSON.stringify(cfg, null, 2));
|
|
89
|
-
}
|
package/src/daemon/launchd.ts
DELETED
|
@@ -1,115 +0,0 @@
|
|
|
1
|
-
import { spawnSync } from "node:child_process";
|
|
2
|
-
import { existsSync, mkdirSync, unlinkSync, writeFileSync } from "node:fs";
|
|
3
|
-
import { homedir } from "node:os";
|
|
4
|
-
import { dirname, join } from "node:path";
|
|
5
|
-
import { DAEMON_LOG_PATH } from "../config.js";
|
|
6
|
-
|
|
7
|
-
export const LABEL = "lab.growthq.vir";
|
|
8
|
-
const LAUNCH_AGENTS_DIR = join(homedir(), "Library", "LaunchAgents");
|
|
9
|
-
const PLIST_PATH = join(LAUNCH_AGENTS_DIR, `${LABEL}.plist`);
|
|
10
|
-
|
|
11
|
-
export function plistPath(): string {
|
|
12
|
-
return PLIST_PATH;
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
function escapeXml(s: string): string {
|
|
16
|
-
return s
|
|
17
|
-
.replace(/&/g, "&")
|
|
18
|
-
.replace(/</g, "<")
|
|
19
|
-
.replace(/>/g, ">")
|
|
20
|
-
.replace(/"/g, """)
|
|
21
|
-
.replace(/'/g, "'");
|
|
22
|
-
}
|
|
23
|
-
|
|
24
|
-
export function renderPlist(opts: {
|
|
25
|
-
nodePath: string;
|
|
26
|
-
cliPath: string;
|
|
27
|
-
intervalSeconds: number;
|
|
28
|
-
logPath: string;
|
|
29
|
-
}): string {
|
|
30
|
-
return `<?xml version="1.0" encoding="UTF-8"?>
|
|
31
|
-
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
|
|
32
|
-
<plist version="1.0">
|
|
33
|
-
<dict>
|
|
34
|
-
<key>Label</key>
|
|
35
|
-
<string>${escapeXml(LABEL)}</string>
|
|
36
|
-
<key>ProgramArguments</key>
|
|
37
|
-
<array>
|
|
38
|
-
<string>${escapeXml(opts.nodePath)}</string>
|
|
39
|
-
<string>${escapeXml(opts.cliPath)}</string>
|
|
40
|
-
<string>run</string>
|
|
41
|
-
<string>--daemon</string>
|
|
42
|
-
</array>
|
|
43
|
-
<key>StartInterval</key>
|
|
44
|
-
<integer>${opts.intervalSeconds}</integer>
|
|
45
|
-
<key>RunAtLoad</key>
|
|
46
|
-
<true/>
|
|
47
|
-
<key>StandardOutPath</key>
|
|
48
|
-
<string>${escapeXml(opts.logPath)}</string>
|
|
49
|
-
<key>StandardErrorPath</key>
|
|
50
|
-
<string>${escapeXml(opts.logPath)}</string>
|
|
51
|
-
<key>EnvironmentVariables</key>
|
|
52
|
-
<dict>
|
|
53
|
-
<key>PATH</key>
|
|
54
|
-
<string>/usr/local/bin:/opt/homebrew/bin:/usr/bin:/bin</string>
|
|
55
|
-
</dict>
|
|
56
|
-
</dict>
|
|
57
|
-
</plist>
|
|
58
|
-
`;
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
function launchctl(args: string[]): { code: number; stderr: string } {
|
|
62
|
-
const res = spawnSync("launchctl", args, { encoding: "utf8" });
|
|
63
|
-
return { code: res.status ?? -1, stderr: res.stderr ?? "" };
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
export function installPlist(opts: {
|
|
67
|
-
nodePath: string;
|
|
68
|
-
cliPath: string;
|
|
69
|
-
cadenceHours: number;
|
|
70
|
-
}): { plistPath: string; loaded: boolean } {
|
|
71
|
-
if (!existsSync(LAUNCH_AGENTS_DIR)) {
|
|
72
|
-
mkdirSync(LAUNCH_AGENTS_DIR, { recursive: true });
|
|
73
|
-
}
|
|
74
|
-
const logDir = dirname(DAEMON_LOG_PATH);
|
|
75
|
-
if (!existsSync(logDir)) mkdirSync(logDir, { recursive: true });
|
|
76
|
-
|
|
77
|
-
const xml = renderPlist({
|
|
78
|
-
nodePath: opts.nodePath,
|
|
79
|
-
cliPath: opts.cliPath,
|
|
80
|
-
intervalSeconds: Math.max(60, Math.round(opts.cadenceHours * 3600)),
|
|
81
|
-
logPath: DAEMON_LOG_PATH,
|
|
82
|
-
});
|
|
83
|
-
|
|
84
|
-
writeFileSync(PLIST_PATH, xml);
|
|
85
|
-
|
|
86
|
-
// unload first in case it exists; ignore failure
|
|
87
|
-
launchctl(["unload", PLIST_PATH]);
|
|
88
|
-
const loadRes = launchctl(["load", PLIST_PATH]);
|
|
89
|
-
if (loadRes.code !== 0) {
|
|
90
|
-
throw new Error(`launchctl load failed: ${loadRes.stderr.trim()}`);
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
return { plistPath: PLIST_PATH, loaded: true };
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
export function uninstallPlist(): { removed: boolean } {
|
|
97
|
-
if (!existsSync(PLIST_PATH)) return { removed: false };
|
|
98
|
-
launchctl(["unload", PLIST_PATH]);
|
|
99
|
-
unlinkSync(PLIST_PATH);
|
|
100
|
-
return { removed: true };
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
export function daemonStatus(): {
|
|
104
|
-
installed: boolean;
|
|
105
|
-
loaded: boolean;
|
|
106
|
-
plistPath: string;
|
|
107
|
-
} {
|
|
108
|
-
const installed = existsSync(PLIST_PATH);
|
|
109
|
-
let loaded = false;
|
|
110
|
-
const res = spawnSync("launchctl", ["list"], { encoding: "utf8" });
|
|
111
|
-
if (res.status === 0 && typeof res.stdout === "string") {
|
|
112
|
-
loaded = res.stdout.includes(LABEL);
|
|
113
|
-
}
|
|
114
|
-
return { installed, loaded, plistPath: PLIST_PATH };
|
|
115
|
-
}
|
package/src/dedupe/detector.ts
DELETED
|
@@ -1,197 +0,0 @@
|
|
|
1
|
-
import type { Config } from "../config.js";
|
|
2
|
-
import type { DistilledRow, StateDb } from "../state/db.js";
|
|
3
|
-
import {
|
|
4
|
-
buildAnthropicClient,
|
|
5
|
-
callLLM,
|
|
6
|
-
normalizeModelName,
|
|
7
|
-
withRateLimitRetry,
|
|
8
|
-
} from "../pipeline/distiller.js";
|
|
9
|
-
import { kebab } from "../pipeline/writer.js";
|
|
10
|
-
|
|
11
|
-
const MAX_CANDIDATE_PAIRS = 30;
|
|
12
|
-
const MIN_DUP_CONFIDENCE = 0.7;
|
|
13
|
-
const STOPWORDS = new Set([
|
|
14
|
-
"the",
|
|
15
|
-
"a",
|
|
16
|
-
"an",
|
|
17
|
-
"is",
|
|
18
|
-
"are",
|
|
19
|
-
"was",
|
|
20
|
-
"were",
|
|
21
|
-
"it",
|
|
22
|
-
"this",
|
|
23
|
-
"that",
|
|
24
|
-
"to",
|
|
25
|
-
"of",
|
|
26
|
-
"in",
|
|
27
|
-
"on",
|
|
28
|
-
"and",
|
|
29
|
-
"or",
|
|
30
|
-
"for",
|
|
31
|
-
"with",
|
|
32
|
-
"as",
|
|
33
|
-
"by",
|
|
34
|
-
]);
|
|
35
|
-
|
|
36
|
-
export interface DuplicatePair {
|
|
37
|
-
a: DistilledRow;
|
|
38
|
-
b: DistilledRow;
|
|
39
|
-
isDuplicate: true;
|
|
40
|
-
confidence: number;
|
|
41
|
-
reason: string;
|
|
42
|
-
keepWhich: "A" | "B" | "merge";
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
interface CandidatePair {
|
|
46
|
-
a: DistilledRow;
|
|
47
|
-
b: DistilledRow;
|
|
48
|
-
score: number;
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
export function findCandidatePairs(rows: DistilledRow[]): CandidatePair[] {
|
|
52
|
-
const pairs: CandidatePair[] = [];
|
|
53
|
-
for (let i = 0; i < rows.length; i += 1) {
|
|
54
|
-
const a = rows[i];
|
|
55
|
-
if (!a) continue;
|
|
56
|
-
for (let j = i + 1; j < rows.length; j += 1) {
|
|
57
|
-
const b = rows[j];
|
|
58
|
-
if (!b) continue;
|
|
59
|
-
const score = scoreCandidate(a, b);
|
|
60
|
-
if (score > 0) pairs.push({ a, b, score });
|
|
61
|
-
}
|
|
62
|
-
}
|
|
63
|
-
pairs.sort((x, y) => y.score - x.score);
|
|
64
|
-
return pairs.slice(0, MAX_CANDIDATE_PAIRS);
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
function scoreCandidate(a: DistilledRow, b: DistilledRow): number {
|
|
68
|
-
const aSlug = kebab(a.topic);
|
|
69
|
-
const bSlug = kebab(b.topic);
|
|
70
|
-
const aTokens = aSlug.split("-").filter((t) => t.length >= 3);
|
|
71
|
-
const bTokens = bSlug.split("-").filter((t) => t.length >= 3);
|
|
72
|
-
const bSet = new Set(bTokens);
|
|
73
|
-
|
|
74
|
-
const shared = aTokens.filter((t) => bSet.has(t)).length;
|
|
75
|
-
const union = new Set([...aTokens, ...bTokens]).size;
|
|
76
|
-
const jaccard = union === 0 ? 0 : shared / union;
|
|
77
|
-
|
|
78
|
-
const sameProjCat =
|
|
79
|
-
kebab(a.project) === kebab(b.project) && a.category === b.category;
|
|
80
|
-
|
|
81
|
-
const aWords = significantWords(a.content);
|
|
82
|
-
const bWords = significantWords(b.content);
|
|
83
|
-
let sharedSig = 0;
|
|
84
|
-
for (const w of aWords) if (bWords.has(w)) sharedSig += 1;
|
|
85
|
-
|
|
86
|
-
let score = 0;
|
|
87
|
-
if (sameProjCat && shared >= 2) score += 5;
|
|
88
|
-
if (jaccard > 0.7) score += 4;
|
|
89
|
-
if (sharedSig >= 3) score += Math.min(3, sharedSig - 2);
|
|
90
|
-
|
|
91
|
-
// Tie-breaker: more shared structure ranks higher.
|
|
92
|
-
score += shared * 0.5;
|
|
93
|
-
return score;
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
function significantWords(content: string): Set<string> {
|
|
97
|
-
const head = content.slice(0, 100).toLowerCase();
|
|
98
|
-
const tokens = head.split(/\W+/).filter((t) => t.length >= 3);
|
|
99
|
-
return new Set(tokens.filter((t) => !STOPWORDS.has(t)));
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
export async function detectDuplicates(
|
|
103
|
-
cfg: Config,
|
|
104
|
-
db: StateDb,
|
|
105
|
-
): Promise<{ checked: number; duplicates: DuplicatePair[] }> {
|
|
106
|
-
const rows = db.listDistilled();
|
|
107
|
-
const pairs = findCandidatePairs(rows);
|
|
108
|
-
|
|
109
|
-
if (pairs.length === 0) return { checked: 0, duplicates: [] };
|
|
110
|
-
|
|
111
|
-
const client = buildAnthropicClient(cfg);
|
|
112
|
-
const model = normalizeModelName(cfg.models.classify, cfg.provider);
|
|
113
|
-
const duplicates: DuplicatePair[] = [];
|
|
114
|
-
|
|
115
|
-
for (const pair of pairs) {
|
|
116
|
-
const prompt = `Are these two knowledge notes duplicates or near-duplicates?
|
|
117
|
-
They are duplicates if they teach the same lesson or describe the same pattern/gotcha, even if worded differently.
|
|
118
|
-
|
|
119
|
-
Answer JSON only:
|
|
120
|
-
{
|
|
121
|
-
"isDuplicate": boolean,
|
|
122
|
-
"confidence": number (0..1),
|
|
123
|
-
"reason": "string (max 20 words)",
|
|
124
|
-
"keepWhich": "A" | "B" | "merge"
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
keepWhich: "A" if A is more detailed/recent, "B" if B is, "merge" if both have unique value worth combining.
|
|
128
|
-
|
|
129
|
-
Note A (topic: ${pair.a.topic}, date: ${pair.a.startedAt ?? "?"}, confidence: ${pair.a.confidence}):
|
|
130
|
-
${headOf(pair.a.content, 400)}
|
|
131
|
-
|
|
132
|
-
Note B (topic: ${pair.b.topic}, date: ${pair.b.startedAt ?? "?"}, confidence: ${pair.b.confidence}):
|
|
133
|
-
${headOf(pair.b.content, 400)}`;
|
|
134
|
-
|
|
135
|
-
try {
|
|
136
|
-
const text = await withRateLimitRetry(() =>
|
|
137
|
-
callLLM(cfg, client, { prompt, model, maxTokens: 250 }),
|
|
138
|
-
);
|
|
139
|
-
const parsed = parseResponse(text);
|
|
140
|
-
if (parsed.isDuplicate && parsed.confidence >= MIN_DUP_CONFIDENCE) {
|
|
141
|
-
duplicates.push({
|
|
142
|
-
a: pair.a,
|
|
143
|
-
b: pair.b,
|
|
144
|
-
isDuplicate: true,
|
|
145
|
-
confidence: parsed.confidence,
|
|
146
|
-
reason: parsed.reason,
|
|
147
|
-
keepWhich: parsed.keepWhich,
|
|
148
|
-
});
|
|
149
|
-
}
|
|
150
|
-
} catch {
|
|
151
|
-
// single failure shouldn't kill the run
|
|
152
|
-
}
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
return { checked: pairs.length, duplicates };
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
function headOf(s: string, n: number): string {
|
|
159
|
-
return s.replace(/\s+/g, " ").trim().slice(0, n);
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
function parseResponse(text: string): {
|
|
163
|
-
isDuplicate: boolean;
|
|
164
|
-
confidence: number;
|
|
165
|
-
reason: string;
|
|
166
|
-
keepWhich: "A" | "B" | "merge";
|
|
167
|
-
} {
|
|
168
|
-
const match = text.match(/\{[\s\S]*\}/);
|
|
169
|
-
const fallback = {
|
|
170
|
-
isDuplicate: false,
|
|
171
|
-
confidence: 0,
|
|
172
|
-
reason: "",
|
|
173
|
-
keepWhich: "merge" as const,
|
|
174
|
-
};
|
|
175
|
-
if (!match) return fallback;
|
|
176
|
-
try {
|
|
177
|
-
const obj = JSON.parse(match[0]) as Record<string, unknown>;
|
|
178
|
-
const rawKeep = obj.keepWhich;
|
|
179
|
-
const keepWhich: "A" | "B" | "merge" =
|
|
180
|
-
rawKeep === "A" || rawKeep === "B" || rawKeep === "merge"
|
|
181
|
-
? rawKeep
|
|
182
|
-
: "merge";
|
|
183
|
-
return {
|
|
184
|
-
isDuplicate: obj.isDuplicate === true,
|
|
185
|
-
confidence: clamp01(Number(obj.confidence ?? 0)),
|
|
186
|
-
reason: typeof obj.reason === "string" ? obj.reason : "",
|
|
187
|
-
keepWhich,
|
|
188
|
-
};
|
|
189
|
-
} catch {
|
|
190
|
-
return fallback;
|
|
191
|
-
}
|
|
192
|
-
}
|
|
193
|
-
|
|
194
|
-
function clamp01(n: number): number {
|
|
195
|
-
if (!Number.isFinite(n)) return 0;
|
|
196
|
-
return Math.max(0, Math.min(1, n));
|
|
197
|
-
}
|
package/src/dedupe/merger.ts
DELETED
|
@@ -1,172 +0,0 @@
|
|
|
1
|
-
import {
|
|
2
|
-
appendFileSync,
|
|
3
|
-
existsSync,
|
|
4
|
-
mkdirSync,
|
|
5
|
-
readFileSync,
|
|
6
|
-
renameSync,
|
|
7
|
-
writeFileSync,
|
|
8
|
-
} from "node:fs";
|
|
9
|
-
import { basename, join } from "node:path";
|
|
10
|
-
import type { Config } from "../config.js";
|
|
11
|
-
import type { DistilledRow, StateDb } from "../state/db.js";
|
|
12
|
-
import {
|
|
13
|
-
buildAnthropicClient,
|
|
14
|
-
callLLM,
|
|
15
|
-
normalizeModelName,
|
|
16
|
-
withRateLimitRetry,
|
|
17
|
-
} from "../pipeline/distiller.js";
|
|
18
|
-
import { kebab } from "../pipeline/writer.js";
|
|
19
|
-
|
|
20
|
-
const CATEGORY_DIRS: Record<string, string> = {
|
|
21
|
-
pattern: "patterns",
|
|
22
|
-
gotcha: "gotchas",
|
|
23
|
-
decision: "decisions",
|
|
24
|
-
tool: "tools",
|
|
25
|
-
};
|
|
26
|
-
|
|
27
|
-
export interface MergeOutcome {
|
|
28
|
-
winnerPath: string;
|
|
29
|
-
archivedPath: string;
|
|
30
|
-
action: "keep-a" | "keep-b" | "merge";
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
export async function mergeNotes(
|
|
34
|
-
cfg: Config,
|
|
35
|
-
db: StateDb,
|
|
36
|
-
a: DistilledRow,
|
|
37
|
-
b: DistilledRow,
|
|
38
|
-
keepWhich: "A" | "B" | "merge",
|
|
39
|
-
): Promise<MergeOutcome> {
|
|
40
|
-
const root = join(cfg.vaultPath, cfg.outputDir);
|
|
41
|
-
const archivedDir = join(root, "archived");
|
|
42
|
-
if (!existsSync(archivedDir)) mkdirSync(archivedDir, { recursive: true });
|
|
43
|
-
|
|
44
|
-
const aFile = resolveNotePath(root, a);
|
|
45
|
-
const bFile = resolveNotePath(root, b);
|
|
46
|
-
|
|
47
|
-
if (keepWhich === "merge") {
|
|
48
|
-
return doMerge(cfg, db, root, archivedDir, a, b, aFile, bFile);
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
const loser = keepWhich === "A" ? b : a;
|
|
52
|
-
const winnerFile = keepWhich === "A" ? aFile : bFile;
|
|
53
|
-
const loserFile = keepWhich === "A" ? bFile : aFile;
|
|
54
|
-
|
|
55
|
-
const archivedPath = archiveFile(archivedDir, loserFile);
|
|
56
|
-
appendArchivedSection(winnerFile, archivedPath);
|
|
57
|
-
db.archive(loser.path);
|
|
58
|
-
|
|
59
|
-
return {
|
|
60
|
-
winnerPath: winnerFile,
|
|
61
|
-
archivedPath,
|
|
62
|
-
action: keepWhich === "A" ? "keep-a" : "keep-b",
|
|
63
|
-
};
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
async function doMerge(
|
|
67
|
-
cfg: Config,
|
|
68
|
-
db: StateDb,
|
|
69
|
-
root: string,
|
|
70
|
-
archivedDir: string,
|
|
71
|
-
a: DistilledRow,
|
|
72
|
-
b: DistilledRow,
|
|
73
|
-
aFile: string,
|
|
74
|
-
bFile: string,
|
|
75
|
-
): Promise<MergeOutcome> {
|
|
76
|
-
// Higher-confidence row wins the file path (and DB content); tie → A.
|
|
77
|
-
const aWins = a.confidence >= b.confidence;
|
|
78
|
-
const winner = aWins ? a : b;
|
|
79
|
-
const loser = aWins ? b : a;
|
|
80
|
-
const winnerFile = aWins ? aFile : bFile;
|
|
81
|
-
const loserFile = aWins ? bFile : aFile;
|
|
82
|
-
|
|
83
|
-
const prompt = `Merge these two knowledge notes into one superior note.
|
|
84
|
-
Keep all unique insights from both. Use the better structure.
|
|
85
|
-
Output only the merged markdown body, no frontmatter.
|
|
86
|
-
|
|
87
|
-
Note A:
|
|
88
|
-
${a.content}
|
|
89
|
-
|
|
90
|
-
Note B:
|
|
91
|
-
${b.content}`;
|
|
92
|
-
|
|
93
|
-
const client = buildAnthropicClient(cfg);
|
|
94
|
-
const model = normalizeModelName(cfg.models.distill, cfg.provider);
|
|
95
|
-
const merged = (
|
|
96
|
-
await withRateLimitRetry(() =>
|
|
97
|
-
callLLM(cfg, client, { prompt, model, maxTokens: 2000 }),
|
|
98
|
-
)
|
|
99
|
-
).trim();
|
|
100
|
-
|
|
101
|
-
rewriteWinnerBody(winnerFile, merged);
|
|
102
|
-
db.updateContent(winner.path, merged);
|
|
103
|
-
|
|
104
|
-
const archivedPath = archiveFile(archivedDir, loserFile);
|
|
105
|
-
appendArchivedSection(winnerFile, archivedPath);
|
|
106
|
-
db.archive(loser.path);
|
|
107
|
-
void root;
|
|
108
|
-
|
|
109
|
-
return { winnerPath: winnerFile, archivedPath, action: "merge" };
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
function resolveNotePath(root: string, r: DistilledRow): string {
|
|
113
|
-
const dir = CATEGORY_DIRS[r.category] ?? `${r.category}s`;
|
|
114
|
-
const slug = kebab(r.topic);
|
|
115
|
-
const suffix = r.sessionId.slice(0, 8);
|
|
116
|
-
return join(root, dir, `${slug}-${suffix}.md`);
|
|
117
|
-
}
|
|
118
|
-
|
|
119
|
-
function archiveFile(archivedDir: string, sourcePath: string): string {
|
|
120
|
-
if (!existsSync(sourcePath)) {
|
|
121
|
-
// Nothing to move on disk; still record what the target *would* have been.
|
|
122
|
-
return join(archivedDir, basename(sourcePath));
|
|
123
|
-
}
|
|
124
|
-
const dest = uniquePath(join(archivedDir, basename(sourcePath)));
|
|
125
|
-
renameSync(sourcePath, dest);
|
|
126
|
-
return dest;
|
|
127
|
-
}
|
|
128
|
-
|
|
129
|
-
function uniquePath(p: string): string {
|
|
130
|
-
if (!existsSync(p)) return p;
|
|
131
|
-
const dot = p.lastIndexOf(".");
|
|
132
|
-
const base = dot === -1 ? p : p.slice(0, dot);
|
|
133
|
-
const ext = dot === -1 ? "" : p.slice(dot);
|
|
134
|
-
let i = 1;
|
|
135
|
-
while (existsSync(`${base}-${i}${ext}`)) i += 1;
|
|
136
|
-
return `${base}-${i}${ext}`;
|
|
137
|
-
}
|
|
138
|
-
|
|
139
|
-
function appendArchivedSection(winnerFile: string, archivedPath: string): void {
|
|
140
|
-
if (!existsSync(winnerFile)) return;
|
|
141
|
-
const loserSlug = basename(archivedPath, ".md");
|
|
142
|
-
const link = `- [[${loserSlug}]]`;
|
|
143
|
-
|
|
144
|
-
const current = readFileSync(winnerFile, "utf8");
|
|
145
|
-
if (current.includes("## Archived Duplicates")) {
|
|
146
|
-
// Append the bullet under the existing section.
|
|
147
|
-
const updated = current.replace(
|
|
148
|
-
/(## Archived Duplicates\n(?:[\s\S]*?))(\n##|\n*$)/,
|
|
149
|
-
(_match, body: string, tail: string) =>
|
|
150
|
-
`${body}${body.endsWith("\n") ? "" : "\n"}${link}\n${tail}`,
|
|
151
|
-
);
|
|
152
|
-
writeFileSync(winnerFile, updated);
|
|
153
|
-
} else {
|
|
154
|
-
appendFileSync(winnerFile, `\n## Archived Duplicates\n${link}\n`);
|
|
155
|
-
}
|
|
156
|
-
}
|
|
157
|
-
|
|
158
|
-
// Replaces the body (everything after the YAML frontmatter) with merged
|
|
159
|
-
// markdown. Preserves the original frontmatter block.
|
|
160
|
-
function rewriteWinnerBody(winnerFile: string, mergedBody: string): void {
|
|
161
|
-
if (!existsSync(winnerFile)) {
|
|
162
|
-
writeFileSync(winnerFile, mergedBody + "\n");
|
|
163
|
-
return;
|
|
164
|
-
}
|
|
165
|
-
const current = readFileSync(winnerFile, "utf8");
|
|
166
|
-
const fmMatch = current.match(/^(---\n[\s\S]*?\n---\n)/);
|
|
167
|
-
if (!fmMatch) {
|
|
168
|
-
writeFileSync(winnerFile, mergedBody + "\n");
|
|
169
|
-
return;
|
|
170
|
-
}
|
|
171
|
-
writeFileSync(winnerFile, `${fmMatch[1]}\n${mergedBody}\n`);
|
|
172
|
-
}
|