gitmem-mcp 1.4.4 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. package/CHANGELOG.md +27 -0
  2. package/README.md +21 -4
  3. package/bin/gitmem.js +10 -0
  4. package/dist/commands/activate.d.ts +20 -0
  5. package/dist/commands/activate.js +562 -0
  6. package/dist/commands/deactivate.d.ts +10 -0
  7. package/dist/commands/deactivate.js +95 -0
  8. package/dist/commands/migrate-local.d.ts +53 -0
  9. package/dist/commands/migrate-local.js +177 -0
  10. package/dist/hooks/format-utils.js +4 -0
  11. package/dist/schemas/log.d.ts +2 -2
  12. package/dist/schemas/search.d.ts +2 -2
  13. package/dist/schemas/session-close.d.ts +12 -12
  14. package/dist/server.js +33 -2
  15. package/dist/services/analytics.d.ts +22 -0
  16. package/dist/services/analytics.js +68 -0
  17. package/dist/services/doc-chunker.d.ts +45 -0
  18. package/dist/services/doc-chunker.js +208 -0
  19. package/dist/services/doc-index.d.ts +88 -0
  20. package/dist/services/doc-index.js +328 -0
  21. package/dist/services/license.d.ts +57 -0
  22. package/dist/services/license.js +200 -0
  23. package/dist/services/supabase-client.d.ts +6 -0
  24. package/dist/services/supabase-client.js +75 -22
  25. package/dist/services/tier.d.ts +13 -3
  26. package/dist/services/tier.js +38 -7
  27. package/dist/tools/definitions.d.ts +688 -0
  28. package/dist/tools/definitions.js +87 -0
  29. package/dist/tools/index-docs.d.ts +30 -0
  30. package/dist/tools/index-docs.js +163 -0
  31. package/dist/tools/prepare-context.js +7 -0
  32. package/dist/tools/recall.js +25 -4
  33. package/dist/tools/search-docs.d.ts +38 -0
  34. package/dist/tools/search-docs.js +94 -0
  35. package/dist/tools/search.js +11 -1
  36. package/dist/tools/session-close.js +76 -7
  37. package/dist/tools/session-start.js +57 -5
  38. package/package.json +1 -1
  39. package/schema/setup.sql +489 -25
@@ -0,0 +1,45 @@
1
+ /**
2
+ * Document Chunker — Split markdown files into searchable chunks
3
+ *
4
+ * Strategy:
5
+ * 1. Split on H2 headers first (natural semantic boundaries)
6
+ * 2. If a section exceeds target size, split on paragraph boundaries
7
+ * 3. Each chunk carries metadata: file path, title, category, chunk index
8
+ *
9
+ * Target chunk size: 500-800 tokens (~2000-3200 chars)
10
+ */
11
+ export interface DocChunk {
12
+ file_path: string;
13
+ chunk_index: number;
14
+ title: string;
15
+ section_title: string;
16
+ category: string;
17
+ content: string;
18
+ file_hash: string;
19
+ }
20
+ export interface DocFile {
21
+ absolute_path: string;
22
+ relative_path: string;
23
+ content: string;
24
+ hash: string;
25
+ }
26
+ /**
27
+ * Chunk a single markdown file into searchable segments
28
+ */
29
+ export declare function chunkDocument(doc: DocFile): DocChunk[];
30
+ /**
31
+ * Scan a directory for markdown files
32
+ */
33
+ export declare function scanDirectory(dirPath: string, options?: {
34
+ exclude?: string[];
35
+ }): DocFile[];
36
+ /**
37
+ * Chunk all markdown files in a directory
38
+ */
39
+ export declare function chunkDirectory(dirPath: string, options?: {
40
+ exclude?: string[];
41
+ }): {
42
+ files: DocFile[];
43
+ chunks: DocChunk[];
44
+ };
45
+ //# sourceMappingURL=doc-chunker.d.ts.map
@@ -0,0 +1,208 @@
1
+ /**
2
+ * Document Chunker — Split markdown files into searchable chunks
3
+ *
4
+ * Strategy:
5
+ * 1. Split on H2 headers first (natural semantic boundaries)
6
+ * 2. If a section exceeds target size, split on paragraph boundaries
7
+ * 3. Each chunk carries metadata: file path, title, category, chunk index
8
+ *
9
+ * Target chunk size: 500-800 tokens (~2000-3200 chars)
10
+ */
11
+ import * as fs from "fs";
12
+ import * as path from "path";
13
+ import * as crypto from "crypto";
14
+ const TARGET_CHUNK_CHARS = 2400; // ~600 tokens
15
+ const MAX_CHUNK_CHARS = 3600; // ~900 tokens hard limit
16
+ const MIN_CHUNK_CHARS = 200; // Don't create tiny chunks
17
+ /**
18
+ * Extract title from markdown content (first H1, or filename)
19
+ */
20
+ function extractTitle(content, filePath) {
21
+ const h1Match = content.match(/^#\s+(.+)$/m);
22
+ if (h1Match)
23
+ return h1Match[1].trim();
24
+ // Fall back to filename without extension
25
+ return path.basename(filePath, ".md").replace(/[-_]/g, " ");
26
+ }
27
+ /**
28
+ * Extract category from directory structure
29
+ */
30
+ function extractCategory(relativePath) {
31
+ const parts = relativePath.split(path.sep);
32
+ if (parts.length > 1)
33
+ return parts[0];
34
+ return "root";
35
+ }
36
+ /**
37
+ * Split markdown into sections by H2 headers
38
+ */
39
+ function splitByH2(content) {
40
+ const sections = [];
41
+ const lines = content.split("\n");
42
+ let currentTitle = "";
43
+ let currentLines = [];
44
+ for (const line of lines) {
45
+ const h2Match = line.match(/^##\s+(.+)$/);
46
+ if (h2Match) {
47
+ // Save previous section if it has content
48
+ if (currentLines.length > 0) {
49
+ const text = currentLines.join("\n").trim();
50
+ if (text.length > 0) {
51
+ sections.push({ title: currentTitle, content: text });
52
+ }
53
+ }
54
+ currentTitle = h2Match[1].trim();
55
+ currentLines = [];
56
+ }
57
+ else {
58
+ currentLines.push(line);
59
+ }
60
+ }
61
+ // Don't forget the last section
62
+ if (currentLines.length > 0) {
63
+ const text = currentLines.join("\n").trim();
64
+ if (text.length > 0) {
65
+ sections.push({ title: currentTitle, content: text });
66
+ }
67
+ }
68
+ return sections;
69
+ }
70
+ /**
71
+ * Split a text blob on paragraph boundaries to fit within target size
72
+ */
73
+ function splitByParagraphs(text, maxChars) {
74
+ if (text.length <= maxChars)
75
+ return [text];
76
+ const chunks = [];
77
+ const paragraphs = text.split(/\n\n+/);
78
+ let current = "";
79
+ for (const para of paragraphs) {
80
+ if (current.length + para.length + 2 > maxChars && current.length > 0) {
81
+ chunks.push(current.trim());
82
+ current = para;
83
+ }
84
+ else {
85
+ current = current ? current + "\n\n" + para : para;
86
+ }
87
+ }
88
+ if (current.trim().length > 0) {
89
+ chunks.push(current.trim());
90
+ }
91
+ return chunks;
92
+ }
93
+ /**
94
+ * Compute SHA-256 hash of content
95
+ */
96
+ function hashContent(content) {
97
+ return crypto.createHash("sha256").update(content).digest("hex");
98
+ }
99
+ /**
100
+ * Chunk a single markdown file into searchable segments
101
+ */
102
+ export function chunkDocument(doc) {
103
+ const title = extractTitle(doc.content, doc.relative_path);
104
+ const category = extractCategory(doc.relative_path);
105
+ const chunks = [];
106
+ let chunkIndex = 0;
107
+ // Split by H2 headers
108
+ const sections = splitByH2(doc.content);
109
+ for (const section of sections) {
110
+ // If section fits in one chunk, use it directly
111
+ if (section.content.length <= MAX_CHUNK_CHARS) {
112
+ if (section.content.length >= MIN_CHUNK_CHARS) {
113
+ chunks.push({
114
+ file_path: doc.relative_path,
115
+ chunk_index: chunkIndex++,
116
+ title,
117
+ section_title: section.title,
118
+ category,
119
+ content: section.content,
120
+ file_hash: doc.hash,
121
+ });
122
+ }
123
+ }
124
+ else {
125
+ // Section too large — split by paragraphs
126
+ const subChunks = splitByParagraphs(section.content, TARGET_CHUNK_CHARS);
127
+ for (const sub of subChunks) {
128
+ if (sub.length >= MIN_CHUNK_CHARS) {
129
+ chunks.push({
130
+ file_path: doc.relative_path,
131
+ chunk_index: chunkIndex++,
132
+ title,
133
+ section_title: section.title,
134
+ category,
135
+ content: sub,
136
+ file_hash: doc.hash,
137
+ });
138
+ }
139
+ }
140
+ }
141
+ }
142
+ // Edge case: file with no H2 headers and short content — one chunk
143
+ if (chunks.length === 0 && doc.content.trim().length >= MIN_CHUNK_CHARS) {
144
+ chunks.push({
145
+ file_path: doc.relative_path,
146
+ chunk_index: 0,
147
+ title,
148
+ section_title: "",
149
+ category,
150
+ content: doc.content.trim().slice(0, MAX_CHUNK_CHARS),
151
+ file_hash: doc.hash,
152
+ });
153
+ }
154
+ return chunks;
155
+ }
156
+ /**
157
+ * Scan a directory for markdown files
158
+ */
159
+ export function scanDirectory(dirPath, options = {}) {
160
+ const exclude = options.exclude || ["_archive", "node_modules", ".git"];
161
+ const files = [];
162
+ function walk(currentPath) {
163
+ let entries;
164
+ try {
165
+ entries = fs.readdirSync(currentPath, { withFileTypes: true });
166
+ }
167
+ catch {
168
+ return; // Permission denied or inaccessible
169
+ }
170
+ for (const entry of entries) {
171
+ const fullPath = path.join(currentPath, entry.name);
172
+ if (entry.isDirectory()) {
173
+ if (!exclude.includes(entry.name)) {
174
+ walk(fullPath);
175
+ }
176
+ }
177
+ else if (entry.isFile() && entry.name.endsWith(".md")) {
178
+ try {
179
+ const content = fs.readFileSync(fullPath, "utf-8");
180
+ const relativePath = path.relative(dirPath, fullPath);
181
+ files.push({
182
+ absolute_path: fullPath,
183
+ relative_path: relativePath,
184
+ content,
185
+ hash: hashContent(content),
186
+ });
187
+ }
188
+ catch {
189
+ // Skip unreadable files
190
+ }
191
+ }
192
+ }
193
+ }
194
+ walk(dirPath);
195
+ return files;
196
+ }
197
+ /**
198
+ * Chunk all markdown files in a directory
199
+ */
200
+ export function chunkDirectory(dirPath, options = {}) {
201
+ const files = scanDirectory(dirPath, options);
202
+ const chunks = [];
203
+ for (const file of files) {
204
+ chunks.push(...chunkDocument(file));
205
+ }
206
+ return { files, chunks };
207
+ }
208
+ //# sourceMappingURL=doc-chunker.js.map
@@ -0,0 +1,88 @@
1
+ /**
2
+ * Document Index — Storage and search for indexed doc chunks
3
+ *
4
+ * Supports two backends:
5
+ * - Free tier: Local JSON file with BM25 keyword search
6
+ * - Pro/dev tier: In-memory vector index with embeddings
7
+ *
8
+ * Follows the same patterns as local-vector-search.ts and local-file-storage.ts
9
+ */
10
+ import type { DocChunk } from "./doc-chunker.js";
11
+ export interface IndexedDocChunk {
12
+ id: string;
13
+ file_path: string;
14
+ chunk_index: number;
15
+ title: string;
16
+ section_title: string;
17
+ category: string;
18
+ content: string;
19
+ file_hash: string;
20
+ project: string;
21
+ embedding?: number[];
22
+ indexed_at: string;
23
+ }
24
+ export interface DocSearchResult {
25
+ id: string;
26
+ file_path: string;
27
+ chunk_index: number;
28
+ title: string;
29
+ section_title: string;
30
+ category: string;
31
+ content: string;
32
+ similarity: number;
33
+ project: string;
34
+ }
35
+ export interface IndexStats {
36
+ total_chunks: number;
37
+ total_files: number;
38
+ files_indexed: string[];
39
+ categories: Record<string, number>;
40
+ project: string;
41
+ has_embeddings: boolean;
42
+ }
43
+ /**
44
+ * Index doc chunks into storage.
45
+ *
46
+ * - Removes old chunks for the same project + file_path
47
+ * - Generates embeddings if available (pro/dev tier)
48
+ * - Stores to local JSON file
49
+ * - Loads into in-memory vector index if embeddings present
50
+ *
51
+ * Returns count of chunks indexed.
52
+ */
53
+ export declare function indexChunks(chunks: DocChunk[], project: string, options?: {
54
+ batchSize?: number;
55
+ }): Promise<{
56
+ indexed: number;
57
+ embedded: number;
58
+ errors: number;
59
+ }>;
60
+ /**
61
+ * Search indexed docs using semantic similarity (pro/dev) or BM25 (free)
62
+ */
63
+ export declare function searchDocs(query: string, options?: {
64
+ project?: string;
65
+ category?: string;
66
+ match_count?: number;
67
+ }): Promise<DocSearchResult[]>;
68
+ /**
69
+ * Get index statistics
70
+ */
71
+ export declare function getIndexStats(project?: string): IndexStats;
72
+ /**
73
+ * Check which files have changed since last index (by hash)
74
+ */
75
+ export declare function getChangedFiles(fileHashes: Map<string, string>, project: string): {
76
+ changed: string[];
77
+ unchanged: string[];
78
+ new_files: string[];
79
+ };
80
+ /**
81
+ * Initialize vector index from local storage on startup
82
+ */
83
+ export declare function initDocVectorIndex(): void;
84
+ /**
85
+ * Clear the doc index for a project (or all)
86
+ */
87
+ export declare function clearDocIndex(project?: string): number;
88
+ //# sourceMappingURL=doc-index.d.ts.map
@@ -0,0 +1,328 @@
1
+ /**
2
+ * Document Index — Storage and search for indexed doc chunks
3
+ *
4
+ * Supports two backends:
5
+ * - Free tier: Local JSON file with BM25 keyword search
6
+ * - Pro/dev tier: In-memory vector index with embeddings
7
+ *
8
+ * Follows the same patterns as local-vector-search.ts and local-file-storage.ts
9
+ */
10
+ import * as fs from "fs";
11
+ import * as path from "path";
12
+ import { v4 as uuidv4 } from "uuid";
13
+ import { getGitmemDir } from "./gitmem-dir.js";
14
+ import { bm25Search } from "./bm25.js";
15
+ import { embed as generateEmbedding, isEmbeddingAvailable } from "./embedding.js";
16
+ // --- Local File Index ---
17
+ const INDEX_FILE = "docs-index.json";
18
+ const MAX_INDEX_SIZE = 20 * 1024 * 1024; // 20MB
19
+ /**
20
+ * Get the path to the local docs index file
21
+ */
22
+ function getIndexPath() {
23
+ return path.join(getGitmemDir(), INDEX_FILE);
24
+ }
25
+ /**
26
+ * Read the local index from disk
27
+ */
28
+ function readLocalIndex() {
29
+ const indexPath = getIndexPath();
30
+ if (!fs.existsSync(indexPath))
31
+ return [];
32
+ try {
33
+ const raw = fs.readFileSync(indexPath, "utf-8");
34
+ return JSON.parse(raw);
35
+ }
36
+ catch {
37
+ console.error("[doc-index] Failed to read docs-index.json, starting fresh");
38
+ return [];
39
+ }
40
+ }
41
+ /**
42
+ * Write the local index to disk
43
+ */
44
+ function writeLocalIndex(chunks) {
45
+ const indexPath = getIndexPath();
46
+ const dir = path.dirname(indexPath);
47
+ if (!fs.existsSync(dir)) {
48
+ fs.mkdirSync(dir, { recursive: true });
49
+ }
50
+ // Strip embeddings from local file to save space
51
+ const stripped = chunks.map(({ embedding: _e, ...rest }) => rest);
52
+ const json = JSON.stringify(stripped, null, 2);
53
+ if (Buffer.byteLength(json, "utf-8") > MAX_INDEX_SIZE) {
54
+ console.error("[doc-index] Warning: docs-index.json exceeds 20MB");
55
+ }
56
+ fs.writeFileSync(indexPath, json, "utf-8");
57
+ }
58
+ let vectorIndex = [];
59
+ /**
60
+ * Compute cosine similarity between two normalized vectors
61
+ */
62
+ function cosineSimilarity(a, b) {
63
+ if (a.length !== b.length)
64
+ return 0;
65
+ let dot = 0;
66
+ for (let i = 0; i < a.length; i++) {
67
+ dot += a[i] * b[i];
68
+ }
69
+ return dot;
70
+ }
71
+ // --- Public API ---
72
+ /**
73
+ * Index doc chunks into storage.
74
+ *
75
+ * - Removes old chunks for the same project + file_path
76
+ * - Generates embeddings if available (pro/dev tier)
77
+ * - Stores to local JSON file
78
+ * - Loads into in-memory vector index if embeddings present
79
+ *
80
+ * Returns count of chunks indexed.
81
+ */
82
+ export async function indexChunks(chunks, project, options = {}) {
83
+ const batchSize = options.batchSize || 10;
84
+ const now = new Date().toISOString();
85
+ let embedded = 0;
86
+ let errors = 0;
87
+ // Read existing index
88
+ const existing = readLocalIndex();
89
+ // Build set of file paths being re-indexed
90
+ const reindexedPaths = new Set(chunks.map((c) => `${project}:${c.file_path}`));
91
+ // Remove old chunks for files being re-indexed
92
+ const kept = existing.filter((c) => !reindexedPaths.has(`${c.project}:${c.file_path}`));
93
+ // Create new indexed chunks
94
+ const newChunks = [];
95
+ // Process in batches for embedding
96
+ for (let i = 0; i < chunks.length; i += batchSize) {
97
+ const batch = chunks.slice(i, i + batchSize);
98
+ for (const chunk of batch) {
99
+ const indexed = {
100
+ id: uuidv4(),
101
+ file_path: chunk.file_path,
102
+ chunk_index: chunk.chunk_index,
103
+ title: chunk.title,
104
+ section_title: chunk.section_title,
105
+ category: chunk.category,
106
+ content: chunk.content,
107
+ file_hash: chunk.file_hash,
108
+ project,
109
+ indexed_at: now,
110
+ };
111
+ // Generate embedding if available
112
+ if (isEmbeddingAvailable()) {
113
+ try {
114
+ // Embed title + section + content for richer representation
115
+ const textToEmbed = [
116
+ indexed.title,
117
+ indexed.section_title,
118
+ indexed.content,
119
+ ]
120
+ .filter(Boolean)
121
+ .join(" | ");
122
+ const embedding = await generateEmbedding(textToEmbed);
123
+ if (embedding) {
124
+ indexed.embedding = embedding;
125
+ embedded++;
126
+ }
127
+ }
128
+ catch (err) {
129
+ console.error(`[doc-index] Embedding failed for ${chunk.file_path}:${chunk.chunk_index}:`, err instanceof Error ? err.message : err);
130
+ errors++;
131
+ }
132
+ }
133
+ newChunks.push(indexed);
134
+ }
135
+ // Progress logging for large batches (every ~100 chunks)
136
+ if (chunks.length > 50 && i + batchSize < chunks.length && (i + batchSize) % 100 < batchSize) {
137
+ console.error(`[doc-index] Progress: ${Math.min(i + batchSize, chunks.length)}/${chunks.length} chunks`);
138
+ }
139
+ }
140
+ // Merge and write
141
+ const merged = [...kept, ...newChunks];
142
+ writeLocalIndex(merged);
143
+ // Update in-memory vector index
144
+ rebuildVectorIndex(merged);
145
+ return { indexed: newChunks.length, embedded, errors };
146
+ }
147
+ /**
148
+ * Rebuild the in-memory vector index from stored chunks
149
+ */
150
+ function rebuildVectorIndex(chunks) {
151
+ vectorIndex = chunks
152
+ .filter((c) => c.embedding && Array.isArray(c.embedding) && c.embedding.length > 0)
153
+ .map((c) => ({
154
+ chunk: c,
155
+ embedding: c.embedding,
156
+ }));
157
+ console.error(`[doc-index] Vector index rebuilt: ${vectorIndex.length} entries with embeddings`);
158
+ }
159
+ /**
160
+ * Search indexed docs using semantic similarity (pro/dev) or BM25 (free)
161
+ */
162
+ export async function searchDocs(query, options = {}) {
163
+ const matchCount = options.match_count || 5;
164
+ // Try vector search first (pro/dev tier with embeddings)
165
+ if (isEmbeddingAvailable() && vectorIndex.length > 0) {
166
+ return vectorSearchDocs(query, options);
167
+ }
168
+ // Fall back to BM25 keyword search
169
+ return bm25SearchDocs(query, options);
170
+ }
171
+ /**
172
+ * Vector-based semantic search over doc chunks
173
+ */
174
+ async function vectorSearchDocs(query, options) {
175
+ const matchCount = options.match_count || 5;
176
+ // Generate query embedding
177
+ const queryEmbedding = await generateEmbedding(query);
178
+ if (!queryEmbedding) {
179
+ console.error("[doc-index] Query embedding failed, falling back to BM25");
180
+ return bm25SearchDocs(query, options);
181
+ }
182
+ // Filter candidates
183
+ let candidates = vectorIndex;
184
+ if (options.project) {
185
+ candidates = candidates.filter((e) => e.chunk.project === options.project);
186
+ }
187
+ if (options.category) {
188
+ candidates = candidates.filter((e) => e.chunk.category === options.category);
189
+ }
190
+ // Score by cosine similarity
191
+ const scored = candidates.map((entry) => ({
192
+ chunk: entry.chunk,
193
+ similarity: cosineSimilarity(queryEmbedding, entry.embedding),
194
+ }));
195
+ // Sort and take top k
196
+ scored.sort((a, b) => b.similarity - a.similarity);
197
+ const topK = scored.slice(0, matchCount);
198
+ return topK.map(({ chunk, similarity }) => ({
199
+ id: chunk.id,
200
+ file_path: chunk.file_path,
201
+ chunk_index: chunk.chunk_index,
202
+ title: chunk.title,
203
+ section_title: chunk.section_title,
204
+ category: chunk.category,
205
+ content: chunk.content,
206
+ similarity: Math.round(similarity * 1000) / 1000,
207
+ project: chunk.project,
208
+ }));
209
+ }
210
+ /**
211
+ * BM25 keyword search over doc chunks (free tier)
212
+ */
213
+ function bm25SearchDocs(query, options) {
214
+ const matchCount = options.match_count || 5;
215
+ const chunks = readLocalIndex();
216
+ // Filter by project and category
217
+ let filtered = chunks;
218
+ if (options.project) {
219
+ filtered = filtered.filter((c) => c.project === options.project);
220
+ }
221
+ if (options.category) {
222
+ filtered = filtered.filter((c) => c.category === options.category);
223
+ }
224
+ if (filtered.length === 0)
225
+ return [];
226
+ // Build BM25 documents with field boosting
227
+ const docs = filtered.map((c) => ({
228
+ id: c.id,
229
+ fields: [
230
+ { text: c.title, boost: 3 },
231
+ { text: c.section_title || "", boost: 2 },
232
+ { text: c.category, boost: 1.5 },
233
+ { text: c.content, boost: 1 },
234
+ ],
235
+ }));
236
+ const results = bm25Search(query, docs, matchCount);
237
+ // Map back to DocSearchResult
238
+ const byId = new Map(filtered.map((c) => [c.id, c]));
239
+ return results
240
+ .map((r) => {
241
+ const c = byId.get(r.id);
242
+ if (!c)
243
+ return null;
244
+ return {
245
+ id: c.id,
246
+ file_path: c.file_path,
247
+ chunk_index: c.chunk_index,
248
+ title: c.title,
249
+ section_title: c.section_title,
250
+ category: c.category,
251
+ content: c.content,
252
+ similarity: r.similarity,
253
+ project: c.project,
254
+ };
255
+ })
256
+ .filter((r) => r !== null);
257
+ }
258
+ /**
259
+ * Get index statistics
260
+ */
261
+ export function getIndexStats(project) {
262
+ const chunks = readLocalIndex();
263
+ const filtered = project
264
+ ? chunks.filter((c) => c.project === project)
265
+ : chunks;
266
+ const files = new Set(filtered.map((c) => c.file_path));
267
+ const categories = {};
268
+ for (const c of filtered) {
269
+ categories[c.category] = (categories[c.category] || 0) + 1;
270
+ }
271
+ return {
272
+ total_chunks: filtered.length,
273
+ total_files: files.size,
274
+ files_indexed: Array.from(files).sort(),
275
+ categories,
276
+ project: project || "all",
277
+ has_embeddings: vectorIndex.length > 0,
278
+ };
279
+ }
280
+ /**
281
+ * Check which files have changed since last index (by hash)
282
+ */
283
+ export function getChangedFiles(fileHashes, project) {
284
+ const existing = readLocalIndex().filter((c) => c.project === project);
285
+ const existingHashes = new Map();
286
+ for (const c of existing) {
287
+ existingHashes.set(c.file_path, c.file_hash);
288
+ }
289
+ const changed = [];
290
+ const unchanged = [];
291
+ const newFiles = [];
292
+ for (const [filePath, hash] of fileHashes) {
293
+ const existingHash = existingHashes.get(filePath);
294
+ if (!existingHash) {
295
+ newFiles.push(filePath);
296
+ }
297
+ else if (existingHash !== hash) {
298
+ changed.push(filePath);
299
+ }
300
+ else {
301
+ unchanged.push(filePath);
302
+ }
303
+ }
304
+ return { changed, unchanged, new_files: newFiles };
305
+ }
306
+ /**
307
+ * Initialize vector index from local storage on startup
308
+ */
309
+ export function initDocVectorIndex() {
310
+ const chunks = readLocalIndex();
311
+ rebuildVectorIndex(chunks);
312
+ }
313
+ /**
314
+ * Clear the doc index for a project (or all)
315
+ */
316
+ export function clearDocIndex(project) {
317
+ const existing = readLocalIndex();
318
+ if (!project) {
319
+ writeLocalIndex([]);
320
+ vectorIndex = [];
321
+ return existing.length;
322
+ }
323
+ const kept = existing.filter((c) => c.project !== project);
324
+ writeLocalIndex(kept);
325
+ rebuildVectorIndex(kept);
326
+ return existing.length - kept.length;
327
+ }
328
+ //# sourceMappingURL=doc-index.js.map