gitmem-mcp 1.4.4 → 1.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +18 -0
- package/dist/hooks/format-utils.js +4 -0
- package/dist/server.js +13 -0
- package/dist/services/doc-chunker.d.ts +45 -0
- package/dist/services/doc-chunker.js +208 -0
- package/dist/services/doc-index.d.ts +88 -0
- package/dist/services/doc-index.js +328 -0
- package/dist/tools/definitions.d.ts +688 -0
- package/dist/tools/definitions.js +87 -0
- package/dist/tools/index-docs.d.ts +30 -0
- package/dist/tools/index-docs.js +163 -0
- package/dist/tools/prepare-context.js +7 -0
- package/dist/tools/recall.js +10 -1
- package/dist/tools/search-docs.d.ts +38 -0
- package/dist/tools/search-docs.js +94 -0
- package/dist/tools/search.js +11 -1
- package/dist/tools/session-close.js +45 -2
- package/dist/tools/session-start.js +14 -0
- package/package.json +1 -1
package/CHANGELOG.md
CHANGED
|
@@ -7,6 +7,24 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
|
|
|
7
7
|
|
|
8
8
|
## [Unreleased]
|
|
9
9
|
|
|
10
|
+
## [1.5.1] - 2026-05-13
|
|
11
|
+
|
|
12
|
+
### Fixed
|
|
13
|
+
- **CI smoke test tool count**: Updated `EXPECTED_TOOL_COUNTS` to reflect `index_docs` and `search_docs` additions (+2 per tier). The 1.5.0 release failed to publish because the smoke test expected 23 free-tier tools but found 25.
|
|
14
|
+
|
|
15
|
+
## [1.5.0] - 2026-05-11
|
|
16
|
+
|
|
17
|
+
### Added
|
|
18
|
+
- **`index_docs` tool**: Scan a directory of markdown files, chunk them, and store in a local doc index for semantic search. Supports incremental indexing (only re-processes changed files), force re-index, and project-scoped indexes. Aliases: `gitmem-idx`.
|
|
19
|
+
- **`search_docs` tool**: Search indexed repository documentation using semantic similarity (pro tier) or BM25 keyword search (free tier). Returns relevant chunks with file paths for targeted reading. Aliases: `gitmem-sd`.
|
|
20
|
+
- **Citation protocol**: `recall`, `search`, and `prepare_context` now include a citation rule instructing agents to cite record IDs when referencing facts from institutional memory.
|
|
21
|
+
- **Low confidence tagging**: Recall and search results with similarity below 0.55 are tagged `[low confidence]` — these matches have a 66% N/A rate historically.
|
|
22
|
+
- **Session duration on resume**: `session_start` now shows elapsed session time and loaded scar count when resuming or refreshing an existing session.
|
|
23
|
+
|
|
24
|
+
### Changed
|
|
25
|
+
- **Quick close hard gate**: `session_close` with `close_type: "quick"` now rejects sessions over 30 minutes, requiring standard close instead.
|
|
26
|
+
- **Standard close recall gate**: `session_close` with `close_type: "standard"` now requires at least one `recall()` call during the session (exemptions: quick close, autonomous agents, sessions with inline reflection).
|
|
27
|
+
|
|
10
28
|
## [1.4.4] - 2026-03-31
|
|
11
29
|
|
|
12
30
|
### Fixed
|
|
@@ -57,6 +57,10 @@ export function formatCompact(scars, plan, maxTokens) {
|
|
|
57
57
|
lines.push(line);
|
|
58
58
|
included++;
|
|
59
59
|
}
|
|
60
|
+
// Citation reminder for sub-agent context (compact — one line)
|
|
61
|
+
if (included > 0) {
|
|
62
|
+
lines.push("Cite record IDs for any factual claims from these scars.");
|
|
63
|
+
}
|
|
60
64
|
return { payload: lines.join("\n"), included };
|
|
61
65
|
}
|
|
62
66
|
/**
|
package/dist/server.js
CHANGED
|
@@ -36,6 +36,8 @@ import { dismissSuggestion } from "./tools/dismiss-suggestion.js";
|
|
|
36
36
|
import { cleanupThreads } from "./tools/cleanup-threads.js";
|
|
37
37
|
import { archiveLearning } from "./tools/archive-learning.js";
|
|
38
38
|
import { contributeFeedback } from "./tools/contribute-feedback.js";
|
|
39
|
+
import { indexDocs } from "./tools/index-docs.js";
|
|
40
|
+
import { searchDocsHandler } from "./tools/search-docs.js";
|
|
39
41
|
import { getCacheStatus, checkCacheHealth, flushCache, startBackgroundInit, } from "./services/startup.js";
|
|
40
42
|
import { getEffectTracker } from "./services/effect-tracker.js";
|
|
41
43
|
import { RIPPLE, ANSI } from "./services/display-protocol.js";
|
|
@@ -246,6 +248,8 @@ export function createServer() {
|
|
|
246
248
|
{ alias: "gitmem-al", full: "archive_learning", description: "Archive a scar/win/pattern (is_active=false)" },
|
|
247
249
|
{ alias: "gitmem-graph", full: "graph_traverse", description: "Traverse knowledge graph over institutional memory" },
|
|
248
250
|
{ alias: "gitmem-fb", full: "contribute_feedback", description: "Submit feedback about gitmem (10/session limit)" },
|
|
251
|
+
{ alias: "gitmem-idx", full: "index_docs", description: "Index markdown docs for semantic search" },
|
|
252
|
+
{ alias: "gitmem-sd", full: "search_docs", description: "Search indexed repository docs" },
|
|
249
253
|
];
|
|
250
254
|
if (hasBatchOperations()) {
|
|
251
255
|
commands.push({ alias: "gitmem-rsb", full: "record_scar_usage_batch", description: "Track multiple scars (batch)" });
|
|
@@ -315,6 +319,15 @@ export function createServer() {
|
|
|
315
319
|
case "gm-cache-f":
|
|
316
320
|
result = await flushCache(toolArgs.project || getProject() || "default");
|
|
317
321
|
break;
|
|
322
|
+
// Doc indexing and search
|
|
323
|
+
case "index_docs":
|
|
324
|
+
case "gitmem-idx":
|
|
325
|
+
result = await indexDocs(toolArgs);
|
|
326
|
+
break;
|
|
327
|
+
case "search_docs":
|
|
328
|
+
case "gitmem-sd":
|
|
329
|
+
result = await searchDocsHandler(toolArgs);
|
|
330
|
+
break;
|
|
318
331
|
default:
|
|
319
332
|
throw new Error(`Unknown tool: ${name}`);
|
|
320
333
|
}
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Document Chunker — Split markdown files into searchable chunks
|
|
3
|
+
*
|
|
4
|
+
* Strategy:
|
|
5
|
+
* 1. Split on H2 headers first (natural semantic boundaries)
|
|
6
|
+
* 2. If a section exceeds target size, split on paragraph boundaries
|
|
7
|
+
* 3. Each chunk carries metadata: file path, title, category, chunk index
|
|
8
|
+
*
|
|
9
|
+
* Target chunk size: 500-800 tokens (~2000-3200 chars)
|
|
10
|
+
*/
|
|
11
|
+
export interface DocChunk {
|
|
12
|
+
file_path: string;
|
|
13
|
+
chunk_index: number;
|
|
14
|
+
title: string;
|
|
15
|
+
section_title: string;
|
|
16
|
+
category: string;
|
|
17
|
+
content: string;
|
|
18
|
+
file_hash: string;
|
|
19
|
+
}
|
|
20
|
+
export interface DocFile {
|
|
21
|
+
absolute_path: string;
|
|
22
|
+
relative_path: string;
|
|
23
|
+
content: string;
|
|
24
|
+
hash: string;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Chunk a single markdown file into searchable segments
|
|
28
|
+
*/
|
|
29
|
+
export declare function chunkDocument(doc: DocFile): DocChunk[];
|
|
30
|
+
/**
|
|
31
|
+
* Scan a directory for markdown files
|
|
32
|
+
*/
|
|
33
|
+
export declare function scanDirectory(dirPath: string, options?: {
|
|
34
|
+
exclude?: string[];
|
|
35
|
+
}): DocFile[];
|
|
36
|
+
/**
|
|
37
|
+
* Chunk all markdown files in a directory
|
|
38
|
+
*/
|
|
39
|
+
export declare function chunkDirectory(dirPath: string, options?: {
|
|
40
|
+
exclude?: string[];
|
|
41
|
+
}): {
|
|
42
|
+
files: DocFile[];
|
|
43
|
+
chunks: DocChunk[];
|
|
44
|
+
};
|
|
45
|
+
//# sourceMappingURL=doc-chunker.d.ts.map
|
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Document Chunker — Split markdown files into searchable chunks
|
|
3
|
+
*
|
|
4
|
+
* Strategy:
|
|
5
|
+
* 1. Split on H2 headers first (natural semantic boundaries)
|
|
6
|
+
* 2. If a section exceeds target size, split on paragraph boundaries
|
|
7
|
+
* 3. Each chunk carries metadata: file path, title, category, chunk index
|
|
8
|
+
*
|
|
9
|
+
* Target chunk size: 500-800 tokens (~2000-3200 chars)
|
|
10
|
+
*/
|
|
11
|
+
import * as fs from "fs";
|
|
12
|
+
import * as path from "path";
|
|
13
|
+
import * as crypto from "crypto";
|
|
14
|
+
const TARGET_CHUNK_CHARS = 2400; // ~600 tokens
|
|
15
|
+
const MAX_CHUNK_CHARS = 3600; // ~900 tokens hard limit
|
|
16
|
+
const MIN_CHUNK_CHARS = 200; // Don't create tiny chunks
|
|
17
|
+
/**
|
|
18
|
+
* Extract title from markdown content (first H1, or filename)
|
|
19
|
+
*/
|
|
20
|
+
function extractTitle(content, filePath) {
|
|
21
|
+
const h1Match = content.match(/^#\s+(.+)$/m);
|
|
22
|
+
if (h1Match)
|
|
23
|
+
return h1Match[1].trim();
|
|
24
|
+
// Fall back to filename without extension
|
|
25
|
+
return path.basename(filePath, ".md").replace(/[-_]/g, " ");
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Extract category from directory structure
|
|
29
|
+
*/
|
|
30
|
+
function extractCategory(relativePath) {
|
|
31
|
+
const parts = relativePath.split(path.sep);
|
|
32
|
+
if (parts.length > 1)
|
|
33
|
+
return parts[0];
|
|
34
|
+
return "root";
|
|
35
|
+
}
|
|
36
|
+
/**
|
|
37
|
+
* Split markdown into sections by H2 headers
|
|
38
|
+
*/
|
|
39
|
+
function splitByH2(content) {
|
|
40
|
+
const sections = [];
|
|
41
|
+
const lines = content.split("\n");
|
|
42
|
+
let currentTitle = "";
|
|
43
|
+
let currentLines = [];
|
|
44
|
+
for (const line of lines) {
|
|
45
|
+
const h2Match = line.match(/^##\s+(.+)$/);
|
|
46
|
+
if (h2Match) {
|
|
47
|
+
// Save previous section if it has content
|
|
48
|
+
if (currentLines.length > 0) {
|
|
49
|
+
const text = currentLines.join("\n").trim();
|
|
50
|
+
if (text.length > 0) {
|
|
51
|
+
sections.push({ title: currentTitle, content: text });
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
currentTitle = h2Match[1].trim();
|
|
55
|
+
currentLines = [];
|
|
56
|
+
}
|
|
57
|
+
else {
|
|
58
|
+
currentLines.push(line);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
// Don't forget the last section
|
|
62
|
+
if (currentLines.length > 0) {
|
|
63
|
+
const text = currentLines.join("\n").trim();
|
|
64
|
+
if (text.length > 0) {
|
|
65
|
+
sections.push({ title: currentTitle, content: text });
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
return sections;
|
|
69
|
+
}
|
|
70
|
+
/**
|
|
71
|
+
* Split a text blob on paragraph boundaries to fit within target size
|
|
72
|
+
*/
|
|
73
|
+
function splitByParagraphs(text, maxChars) {
|
|
74
|
+
if (text.length <= maxChars)
|
|
75
|
+
return [text];
|
|
76
|
+
const chunks = [];
|
|
77
|
+
const paragraphs = text.split(/\n\n+/);
|
|
78
|
+
let current = "";
|
|
79
|
+
for (const para of paragraphs) {
|
|
80
|
+
if (current.length + para.length + 2 > maxChars && current.length > 0) {
|
|
81
|
+
chunks.push(current.trim());
|
|
82
|
+
current = para;
|
|
83
|
+
}
|
|
84
|
+
else {
|
|
85
|
+
current = current ? current + "\n\n" + para : para;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
if (current.trim().length > 0) {
|
|
89
|
+
chunks.push(current.trim());
|
|
90
|
+
}
|
|
91
|
+
return chunks;
|
|
92
|
+
}
|
|
93
|
+
/**
|
|
94
|
+
* Compute SHA-256 hash of content
|
|
95
|
+
*/
|
|
96
|
+
function hashContent(content) {
|
|
97
|
+
return crypto.createHash("sha256").update(content).digest("hex");
|
|
98
|
+
}
|
|
99
|
+
/**
|
|
100
|
+
* Chunk a single markdown file into searchable segments
|
|
101
|
+
*/
|
|
102
|
+
export function chunkDocument(doc) {
|
|
103
|
+
const title = extractTitle(doc.content, doc.relative_path);
|
|
104
|
+
const category = extractCategory(doc.relative_path);
|
|
105
|
+
const chunks = [];
|
|
106
|
+
let chunkIndex = 0;
|
|
107
|
+
// Split by H2 headers
|
|
108
|
+
const sections = splitByH2(doc.content);
|
|
109
|
+
for (const section of sections) {
|
|
110
|
+
// If section fits in one chunk, use it directly
|
|
111
|
+
if (section.content.length <= MAX_CHUNK_CHARS) {
|
|
112
|
+
if (section.content.length >= MIN_CHUNK_CHARS) {
|
|
113
|
+
chunks.push({
|
|
114
|
+
file_path: doc.relative_path,
|
|
115
|
+
chunk_index: chunkIndex++,
|
|
116
|
+
title,
|
|
117
|
+
section_title: section.title,
|
|
118
|
+
category,
|
|
119
|
+
content: section.content,
|
|
120
|
+
file_hash: doc.hash,
|
|
121
|
+
});
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
else {
|
|
125
|
+
// Section too large — split by paragraphs
|
|
126
|
+
const subChunks = splitByParagraphs(section.content, TARGET_CHUNK_CHARS);
|
|
127
|
+
for (const sub of subChunks) {
|
|
128
|
+
if (sub.length >= MIN_CHUNK_CHARS) {
|
|
129
|
+
chunks.push({
|
|
130
|
+
file_path: doc.relative_path,
|
|
131
|
+
chunk_index: chunkIndex++,
|
|
132
|
+
title,
|
|
133
|
+
section_title: section.title,
|
|
134
|
+
category,
|
|
135
|
+
content: sub,
|
|
136
|
+
file_hash: doc.hash,
|
|
137
|
+
});
|
|
138
|
+
}
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
}
|
|
142
|
+
// Edge case: file with no H2 headers and short content — one chunk
|
|
143
|
+
if (chunks.length === 0 && doc.content.trim().length >= MIN_CHUNK_CHARS) {
|
|
144
|
+
chunks.push({
|
|
145
|
+
file_path: doc.relative_path,
|
|
146
|
+
chunk_index: 0,
|
|
147
|
+
title,
|
|
148
|
+
section_title: "",
|
|
149
|
+
category,
|
|
150
|
+
content: doc.content.trim().slice(0, MAX_CHUNK_CHARS),
|
|
151
|
+
file_hash: doc.hash,
|
|
152
|
+
});
|
|
153
|
+
}
|
|
154
|
+
return chunks;
|
|
155
|
+
}
|
|
156
|
+
/**
|
|
157
|
+
* Scan a directory for markdown files
|
|
158
|
+
*/
|
|
159
|
+
export function scanDirectory(dirPath, options = {}) {
|
|
160
|
+
const exclude = options.exclude || ["_archive", "node_modules", ".git"];
|
|
161
|
+
const files = [];
|
|
162
|
+
function walk(currentPath) {
|
|
163
|
+
let entries;
|
|
164
|
+
try {
|
|
165
|
+
entries = fs.readdirSync(currentPath, { withFileTypes: true });
|
|
166
|
+
}
|
|
167
|
+
catch {
|
|
168
|
+
return; // Permission denied or inaccessible
|
|
169
|
+
}
|
|
170
|
+
for (const entry of entries) {
|
|
171
|
+
const fullPath = path.join(currentPath, entry.name);
|
|
172
|
+
if (entry.isDirectory()) {
|
|
173
|
+
if (!exclude.includes(entry.name)) {
|
|
174
|
+
walk(fullPath);
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
else if (entry.isFile() && entry.name.endsWith(".md")) {
|
|
178
|
+
try {
|
|
179
|
+
const content = fs.readFileSync(fullPath, "utf-8");
|
|
180
|
+
const relativePath = path.relative(dirPath, fullPath);
|
|
181
|
+
files.push({
|
|
182
|
+
absolute_path: fullPath,
|
|
183
|
+
relative_path: relativePath,
|
|
184
|
+
content,
|
|
185
|
+
hash: hashContent(content),
|
|
186
|
+
});
|
|
187
|
+
}
|
|
188
|
+
catch {
|
|
189
|
+
// Skip unreadable files
|
|
190
|
+
}
|
|
191
|
+
}
|
|
192
|
+
}
|
|
193
|
+
}
|
|
194
|
+
walk(dirPath);
|
|
195
|
+
return files;
|
|
196
|
+
}
|
|
197
|
+
/**
|
|
198
|
+
* Chunk all markdown files in a directory
|
|
199
|
+
*/
|
|
200
|
+
export function chunkDirectory(dirPath, options = {}) {
|
|
201
|
+
const files = scanDirectory(dirPath, options);
|
|
202
|
+
const chunks = [];
|
|
203
|
+
for (const file of files) {
|
|
204
|
+
chunks.push(...chunkDocument(file));
|
|
205
|
+
}
|
|
206
|
+
return { files, chunks };
|
|
207
|
+
}
|
|
208
|
+
//# sourceMappingURL=doc-chunker.js.map
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Document Index — Storage and search for indexed doc chunks
|
|
3
|
+
*
|
|
4
|
+
* Supports two backends:
|
|
5
|
+
* - Free tier: Local JSON file with BM25 keyword search
|
|
6
|
+
* - Pro/dev tier: In-memory vector index with embeddings
|
|
7
|
+
*
|
|
8
|
+
* Follows the same patterns as local-vector-search.ts and local-file-storage.ts
|
|
9
|
+
*/
|
|
10
|
+
import type { DocChunk } from "./doc-chunker.js";
|
|
11
|
+
export interface IndexedDocChunk {
|
|
12
|
+
id: string;
|
|
13
|
+
file_path: string;
|
|
14
|
+
chunk_index: number;
|
|
15
|
+
title: string;
|
|
16
|
+
section_title: string;
|
|
17
|
+
category: string;
|
|
18
|
+
content: string;
|
|
19
|
+
file_hash: string;
|
|
20
|
+
project: string;
|
|
21
|
+
embedding?: number[];
|
|
22
|
+
indexed_at: string;
|
|
23
|
+
}
|
|
24
|
+
export interface DocSearchResult {
|
|
25
|
+
id: string;
|
|
26
|
+
file_path: string;
|
|
27
|
+
chunk_index: number;
|
|
28
|
+
title: string;
|
|
29
|
+
section_title: string;
|
|
30
|
+
category: string;
|
|
31
|
+
content: string;
|
|
32
|
+
similarity: number;
|
|
33
|
+
project: string;
|
|
34
|
+
}
|
|
35
|
+
export interface IndexStats {
|
|
36
|
+
total_chunks: number;
|
|
37
|
+
total_files: number;
|
|
38
|
+
files_indexed: string[];
|
|
39
|
+
categories: Record<string, number>;
|
|
40
|
+
project: string;
|
|
41
|
+
has_embeddings: boolean;
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Index doc chunks into storage.
|
|
45
|
+
*
|
|
46
|
+
* - Removes old chunks for the same project + file_path
|
|
47
|
+
* - Generates embeddings if available (pro/dev tier)
|
|
48
|
+
* - Stores to local JSON file
|
|
49
|
+
* - Loads into in-memory vector index if embeddings present
|
|
50
|
+
*
|
|
51
|
+
* Returns count of chunks indexed.
|
|
52
|
+
*/
|
|
53
|
+
export declare function indexChunks(chunks: DocChunk[], project: string, options?: {
|
|
54
|
+
batchSize?: number;
|
|
55
|
+
}): Promise<{
|
|
56
|
+
indexed: number;
|
|
57
|
+
embedded: number;
|
|
58
|
+
errors: number;
|
|
59
|
+
}>;
|
|
60
|
+
/**
|
|
61
|
+
* Search indexed docs using semantic similarity (pro/dev) or BM25 (free)
|
|
62
|
+
*/
|
|
63
|
+
export declare function searchDocs(query: string, options?: {
|
|
64
|
+
project?: string;
|
|
65
|
+
category?: string;
|
|
66
|
+
match_count?: number;
|
|
67
|
+
}): Promise<DocSearchResult[]>;
|
|
68
|
+
/**
|
|
69
|
+
* Get index statistics
|
|
70
|
+
*/
|
|
71
|
+
export declare function getIndexStats(project?: string): IndexStats;
|
|
72
|
+
/**
|
|
73
|
+
* Check which files have changed since last index (by hash)
|
|
74
|
+
*/
|
|
75
|
+
export declare function getChangedFiles(fileHashes: Map<string, string>, project: string): {
|
|
76
|
+
changed: string[];
|
|
77
|
+
unchanged: string[];
|
|
78
|
+
new_files: string[];
|
|
79
|
+
};
|
|
80
|
+
/**
|
|
81
|
+
* Initialize vector index from local storage on startup
|
|
82
|
+
*/
|
|
83
|
+
export declare function initDocVectorIndex(): void;
|
|
84
|
+
/**
|
|
85
|
+
* Clear the doc index for a project (or all)
|
|
86
|
+
*/
|
|
87
|
+
export declare function clearDocIndex(project?: string): number;
|
|
88
|
+
//# sourceMappingURL=doc-index.d.ts.map
|