@djolex999/vir-cli 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. package/CLAUDE.md +149 -0
  2. package/LICENSE +21 -0
  3. package/README.md +155 -0
  4. package/dist/claude/updater.js +230 -0
  5. package/dist/claude/updater.js.map +1 -0
  6. package/dist/cli.js +779 -0
  7. package/dist/cli.js.map +1 -0
  8. package/dist/config.js +82 -0
  9. package/dist/config.js.map +1 -0
  10. package/dist/daemon/launchd.js +93 -0
  11. package/dist/daemon/launchd.js.map +1 -0
  12. package/dist/dedupe/detector.js +159 -0
  13. package/dist/dedupe/detector.js.map +1 -0
  14. package/dist/dedupe/merger.js +116 -0
  15. package/dist/dedupe/merger.js.map +1 -0
  16. package/dist/lint/linter.js +224 -0
  17. package/dist/lint/linter.js.map +1 -0
  18. package/dist/pipeline/distiller.js +208 -0
  19. package/dist/pipeline/distiller.js.map +1 -0
  20. package/dist/pipeline/filter.js +28 -0
  21. package/dist/pipeline/filter.js.map +1 -0
  22. package/dist/pipeline/parser.js +109 -0
  23. package/dist/pipeline/parser.js.map +1 -0
  24. package/dist/pipeline/run.js +312 -0
  25. package/dist/pipeline/run.js.map +1 -0
  26. package/dist/pipeline/scanner.js +47 -0
  27. package/dist/pipeline/scanner.js.map +1 -0
  28. package/dist/pipeline/scrubber.js +51 -0
  29. package/dist/pipeline/scrubber.js.map +1 -0
  30. package/dist/pipeline/summarizer.js +162 -0
  31. package/dist/pipeline/summarizer.js.map +1 -0
  32. package/dist/pipeline/types.js +2 -0
  33. package/dist/pipeline/types.js.map +1 -0
  34. package/dist/pipeline/writer.js +195 -0
  35. package/dist/pipeline/writer.js.map +1 -0
  36. package/dist/search/embedder.js +93 -0
  37. package/dist/search/embedder.js.map +1 -0
  38. package/dist/search/retriever.js +212 -0
  39. package/dist/search/retriever.js.map +1 -0
  40. package/dist/search/synthesizer.js +26 -0
  41. package/dist/search/synthesizer.js.map +1 -0
  42. package/dist/state/db.js +309 -0
  43. package/dist/state/db.js.map +1 -0
  44. package/dist/ui/display.js +148 -0
  45. package/dist/ui/display.js.map +1 -0
  46. package/package.json +50 -0
  47. package/src/claude/updater.ts +273 -0
  48. package/src/cli.ts +953 -0
  49. package/src/config.ts +89 -0
  50. package/src/daemon/launchd.ts +115 -0
  51. package/src/dedupe/detector.ts +197 -0
  52. package/src/dedupe/merger.ts +172 -0
  53. package/src/lint/linter.ts +286 -0
  54. package/src/pipeline/distiller.ts +280 -0
  55. package/src/pipeline/filter.ts +43 -0
  56. package/src/pipeline/parser.ts +118 -0
  57. package/src/pipeline/run.ts +378 -0
  58. package/src/pipeline/scanner.ts +51 -0
  59. package/src/pipeline/scrubber.ts +55 -0
  60. package/src/pipeline/summarizer.ts +204 -0
  61. package/src/pipeline/types.ts +41 -0
  62. package/src/pipeline/writer.ts +242 -0
  63. package/src/search/embedder.ts +88 -0
  64. package/src/search/retriever.ts +255 -0
  65. package/src/search/synthesizer.ts +45 -0
  66. package/src/state/db.ts +451 -0
  67. package/src/ui/display.ts +184 -0
  68. package/tsconfig.json +23 -0
  69. package/vir-flow.html +708 -0
@@ -0,0 +1,255 @@
1
+ import { existsSync, readFileSync, readdirSync, statSync } from "node:fs";
2
+ import { join, relative } from "node:path";
3
+ import type { Config } from "../config.js";
4
+ import type { StateDb } from "../state/db.js";
5
+ import {
6
+ cosineSimilarity,
7
+ embed,
8
+ isOllamaAvailable,
9
+ } from "./embedder.js";
10
+
11
+ const SKIP_FILES = new Set(["index.md", "log.md"]);
12
+
13
+ export interface IndexedDoc {
14
+ relPath: string;
15
+ title: string;
16
+ raw: string;
17
+ text: string;
18
+ tokens: string[];
19
+ tf: Map<string, number>;
20
+ }
21
+
22
+ export interface ScoredDoc {
23
+ relPath: string;
24
+ title: string;
25
+ raw: string;
26
+ score: number;
27
+ }
28
+
29
+ export interface SearchHit {
30
+ filePath: string;
31
+ title: string;
32
+ content: string;
33
+ score: number;
34
+ method: "embedding" | "tfidf";
35
+ }
36
+
37
+ const MIN_EMBEDDING_SCORE = 0.3;
38
+
39
+ export async function search(
40
+ cfg: Config,
41
+ db: StateDb,
42
+ query: string,
43
+ topK = 8,
44
+ ): Promise<SearchHit[]> {
45
+ if (await isOllamaAvailable()) {
46
+ const hits = await searchByEmbedding(cfg, db, query, topK);
47
+ // If embeddings produced at least one match above the floor, take it.
48
+ // Otherwise fall through to TF-IDF: low cosine on every doc means the
49
+ // query is semantically off; lexical overlap might still find a match.
50
+ if (hits.length > 0) return hits;
51
+ }
52
+ return searchByTfIdf(cfg, query, topK);
53
+ }
54
+
55
+ async function searchByEmbedding(
56
+ cfg: Config,
57
+ db: StateDb,
58
+ query: string,
59
+ topK: number,
60
+ ): Promise<SearchHit[]> {
61
+ let queryVec: number[];
62
+ try {
63
+ queryVec = await embed(query);
64
+ } catch {
65
+ return [];
66
+ }
67
+
68
+ const root = vaultRoot(cfg);
69
+ const rows = db.getEmbeddings(root);
70
+ if (rows.length === 0) return [];
71
+
72
+ const scored: Array<{ row: (typeof rows)[number]; score: number }> = [];
73
+ for (const r of rows) {
74
+ const s = cosineSimilarity(queryVec, r.embedding);
75
+ if (s >= MIN_EMBEDDING_SCORE) scored.push({ row: r, score: s });
76
+ }
77
+ scored.sort((a, b) => b.score - a.score);
78
+
79
+ const hits: SearchHit[] = [];
80
+ for (const { row, score } of scored.slice(0, topK)) {
81
+ let content = "";
82
+ try {
83
+ content = existsSync(row.filePath) ? readFileSync(row.filePath, "utf8") : "";
84
+ } catch {
85
+ content = "";
86
+ }
87
+ if (content.length === 0) continue;
88
+ const rel = relative(root, row.filePath);
89
+ hits.push({
90
+ filePath: row.filePath,
91
+ title: rel.replace(/\.md$/, ""),
92
+ content,
93
+ score: Math.round(score * 10000) / 10000,
94
+ method: "embedding",
95
+ });
96
+ }
97
+ return hits;
98
+ }
99
+
100
+ function searchByTfIdf(cfg: Config, query: string, topK: number): SearchHit[] {
101
+ const docs = loadIndex(cfg);
102
+ const scored = searchTfIdf(docs, query, topK);
103
+ const root = vaultRoot(cfg);
104
+ return scored.map((d) => ({
105
+ filePath: join(root, d.relPath),
106
+ title: d.title,
107
+ content: d.raw,
108
+ score: d.score,
109
+ method: "tfidf" as const,
110
+ }));
111
+ }
112
+
113
+ export function vaultRoot(cfg: Config): string {
114
+ return join(cfg.vaultPath, cfg.outputDir);
115
+ }
116
+
117
+ export function loadIndex(cfg: Config): IndexedDoc[] {
118
+ const root = vaultRoot(cfg);
119
+ const files: string[] = [];
120
+ walk(root, files);
121
+ const docs: IndexedDoc[] = [];
122
+ for (const full of files) {
123
+ const rel = relative(root, full);
124
+ const base = rel.split("/").pop() ?? rel;
125
+ if (SKIP_FILES.has(base)) continue;
126
+ let raw: string;
127
+ try {
128
+ raw = readFileSync(full, "utf8");
129
+ } catch {
130
+ continue;
131
+ }
132
+ const text = stripMarkdown(raw);
133
+ const tokens = tokenize(text);
134
+ const tf = termFrequency(tokens);
135
+ docs.push({
136
+ relPath: rel,
137
+ title: rel.replace(/\.md$/, ""),
138
+ raw,
139
+ text,
140
+ tokens,
141
+ tf,
142
+ });
143
+ }
144
+ return docs;
145
+ }
146
+
147
+ export function searchTfIdf(
148
+ docs: IndexedDoc[],
149
+ query: string,
150
+ topK = 8,
151
+ ): ScoredDoc[] {
152
+ if (docs.length === 0) return [];
153
+ const queryTokens = uniq(tokenize(query));
154
+ if (queryTokens.length === 0) return [];
155
+
156
+ const totalDocs = docs.length;
157
+ const dfMap = new Map<string, number>();
158
+ for (const term of queryTokens) {
159
+ let df = 0;
160
+ for (const d of docs) if (d.tf.has(term)) df += 1;
161
+ dfMap.set(term, df);
162
+ }
163
+
164
+ const scored: ScoredDoc[] = [];
165
+ for (const d of docs) {
166
+ let score = 0;
167
+ for (const term of queryTokens) {
168
+ const tf = d.tf.get(term) ?? 0;
169
+ if (tf === 0) continue;
170
+ const df = dfMap.get(term) ?? 0;
171
+ if (df === 0) continue;
172
+ const idf = Math.log(totalDocs / df);
173
+ // Normalize TF by doc length so long docs don't dominate.
174
+ const tfNorm = tf / Math.max(1, d.tokens.length);
175
+ score += tfNorm * idf;
176
+ }
177
+ if (score > 0) {
178
+ scored.push({
179
+ relPath: d.relPath,
180
+ title: d.title,
181
+ raw: d.raw,
182
+ score: Math.round(score * 10000) / 10000,
183
+ });
184
+ }
185
+ }
186
+
187
+ scored.sort((a, b) => b.score - a.score);
188
+ return scored.slice(0, topK);
189
+ }
190
+
191
+ function walk(dir: string, acc: string[]): void {
192
+ let entries: string[];
193
+ try {
194
+ entries = readdirSync(dir);
195
+ } catch {
196
+ return;
197
+ }
198
+ for (const name of entries) {
199
+ const full = join(dir, name);
200
+ let st;
201
+ try {
202
+ st = statSync(full);
203
+ } catch {
204
+ continue;
205
+ }
206
+ if (st.isDirectory()) walk(full, acc);
207
+ else if (st.isFile() && name.endsWith(".md")) acc.push(full);
208
+ }
209
+ }
210
+
211
+ export function stripMarkdown(md: string): string {
212
+ let out = md;
213
+ // YAML frontmatter
214
+ out = out.replace(/^---\n[\s\S]*?\n---\n?/, "");
215
+ // Fenced code blocks
216
+ out = out.replace(/```[\s\S]*?```/g, " ");
217
+ // Inline code
218
+ out = out.replace(/`[^`]*`/g, " ");
219
+ // Images
220
+ out = out.replace(/!\[([^\]]*)\]\([^)]+\)/g, "$1");
221
+ // Markdown links -> link text
222
+ out = out.replace(/\[([^\]]+)\]\([^)]+\)/g, "$1");
223
+ // Wikilinks -> inner
224
+ out = out.replace(/\[\[([^\]]+)\]\]/g, "$1");
225
+ // Headings, blockquotes, list markers
226
+ out = out.replace(/^\s{0,3}#{1,6}\s+/gm, "");
227
+ out = out.replace(/^\s*>\s?/gm, "");
228
+ out = out.replace(/^\s*[-*+]\s+/gm, "");
229
+ out = out.replace(/^\s*\d+\.\s+/gm, "");
230
+ // Emphasis markers
231
+ out = out.replace(/\*\*([^*]+)\*\*/g, "$1");
232
+ out = out.replace(/__([^_]+)__/g, "$1");
233
+ out = out.replace(/\*([^*]+)\*/g, "$1");
234
+ out = out.replace(/_([^_]+)_/g, "$1");
235
+ // Horizontal rules
236
+ out = out.replace(/^\s*[-*_]{3,}\s*$/gm, "");
237
+ return out;
238
+ }
239
+
240
+ export function tokenize(s: string): string[] {
241
+ return s
242
+ .toLowerCase()
243
+ .split(/\W+/)
244
+ .filter((t) => t.length >= 3);
245
+ }
246
+
247
+ function termFrequency(tokens: string[]): Map<string, number> {
248
+ const m = new Map<string, number>();
249
+ for (const t of tokens) m.set(t, (m.get(t) ?? 0) + 1);
250
+ return m;
251
+ }
252
+
253
+ function uniq<T>(arr: T[]): T[] {
254
+ return [...new Set(arr)];
255
+ }
@@ -0,0 +1,45 @@
1
+ import type { Config } from "../config.js";
2
+ import {
3
+ buildAnthropicClient,
4
+ callLLM,
5
+ normalizeModelName,
6
+ withRateLimitRetry,
7
+ } from "../pipeline/distiller.js";
8
+ import type { SearchHit } from "./retriever.js";
9
+
10
+ export async function synthesize(
11
+ cfg: Config,
12
+ query: string,
13
+ hits: SearchHit[],
14
+ ): Promise<string> {
15
+ const notes = hits
16
+ .map(
17
+ (h) =>
18
+ `### ${h.title} (score: ${h.score})\n${h.content.trim()}`,
19
+ )
20
+ .join("\n\n---\n\n");
21
+
22
+ const prompt = `You are searching a personal knowledge base of distilled Claude Code session notes. Answer the query directly and concisely using only the provided notes as source.
23
+
24
+ Query: ${query}
25
+
26
+ Notes:
27
+ ${notes}
28
+
29
+ Instructions:
30
+ - Answer directly, 3-5 sentences max
31
+ - Quote the specific note title when citing
32
+ - If notes don't contain relevant info, say so clearly
33
+ - Do not invent information not present in the notes`;
34
+
35
+ const client = buildAnthropicClient(cfg);
36
+ const model = normalizeModelName(cfg.models.distill, cfg.provider);
37
+
38
+ return withRateLimitRetry(() =>
39
+ callLLM(cfg, client, {
40
+ prompt,
41
+ model,
42
+ maxTokens: 600,
43
+ }),
44
+ );
45
+ }