gitmem-mcp 1.4.4 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/CHANGELOG.md CHANGED
@@ -7,6 +7,24 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
7
7
 
8
8
  ## [Unreleased]
9
9
 
10
+ ## [1.5.1] - 2026-05-13
11
+
12
+ ### Fixed
13
+ - **CI smoke test tool count**: Updated `EXPECTED_TOOL_COUNTS` to reflect `index_docs` and `search_docs` additions (+2 per tier). The 1.5.0 release failed to publish because the smoke test expected 23 free-tier tools but found 25.
14
+
15
+ ## [1.5.0] - 2026-05-11
16
+
17
+ ### Added
18
+ - **`index_docs` tool**: Scan a directory of markdown files, chunk them, and store in a local doc index for semantic search. Supports incremental indexing (only re-processes changed files), force re-index, and project-scoped indexes. Aliases: `gitmem-idx`.
19
+ - **`search_docs` tool**: Search indexed repository documentation using semantic similarity (pro tier) or BM25 keyword search (free tier). Returns relevant chunks with file paths for targeted reading. Aliases: `gitmem-sd`.
20
+ - **Citation protocol**: `recall`, `search`, and `prepare_context` now include a citation rule instructing agents to cite record IDs when referencing facts from institutional memory.
21
+ - **Low confidence tagging**: Recall and search results with similarity below 0.55 are tagged `[low confidence]` — these matches have a 66% N/A rate historically.
22
+ - **Session duration on resume**: `session_start` now shows elapsed session time and loaded scar count when resuming or refreshing an existing session.
23
+
24
+ ### Changed
25
+ - **Quick close hard gate**: `session_close` with `close_type: "quick"` now rejects sessions over 30 minutes, requiring standard close instead.
26
+ - **Standard close recall gate**: `session_close` with `close_type: "standard"` now requires at least one `recall()` call during the session (exemptions: quick close, autonomous agents, sessions with inline reflection).
27
+
10
28
  ## [1.4.4] - 2026-03-31
11
29
 
12
30
  ### Fixed
@@ -57,6 +57,10 @@ export function formatCompact(scars, plan, maxTokens) {
57
57
  lines.push(line);
58
58
  included++;
59
59
  }
60
+ // Citation reminder for sub-agent context (compact — one line)
61
+ if (included > 0) {
62
+ lines.push("Cite record IDs for any factual claims from these scars.");
63
+ }
60
64
  return { payload: lines.join("\n"), included };
61
65
  }
62
66
  /**
package/dist/server.js CHANGED
@@ -36,6 +36,8 @@ import { dismissSuggestion } from "./tools/dismiss-suggestion.js";
36
36
  import { cleanupThreads } from "./tools/cleanup-threads.js";
37
37
  import { archiveLearning } from "./tools/archive-learning.js";
38
38
  import { contributeFeedback } from "./tools/contribute-feedback.js";
39
+ import { indexDocs } from "./tools/index-docs.js";
40
+ import { searchDocsHandler } from "./tools/search-docs.js";
39
41
  import { getCacheStatus, checkCacheHealth, flushCache, startBackgroundInit, } from "./services/startup.js";
40
42
  import { getEffectTracker } from "./services/effect-tracker.js";
41
43
  import { RIPPLE, ANSI } from "./services/display-protocol.js";
@@ -246,6 +248,8 @@ export function createServer() {
246
248
  { alias: "gitmem-al", full: "archive_learning", description: "Archive a scar/win/pattern (is_active=false)" },
247
249
  { alias: "gitmem-graph", full: "graph_traverse", description: "Traverse knowledge graph over institutional memory" },
248
250
  { alias: "gitmem-fb", full: "contribute_feedback", description: "Submit feedback about gitmem (10/session limit)" },
251
+ { alias: "gitmem-idx", full: "index_docs", description: "Index markdown docs for semantic search" },
252
+ { alias: "gitmem-sd", full: "search_docs", description: "Search indexed repository docs" },
249
253
  ];
250
254
  if (hasBatchOperations()) {
251
255
  commands.push({ alias: "gitmem-rsb", full: "record_scar_usage_batch", description: "Track multiple scars (batch)" });
@@ -315,6 +319,15 @@ export function createServer() {
315
319
  case "gm-cache-f":
316
320
  result = await flushCache(toolArgs.project || getProject() || "default");
317
321
  break;
322
+ // Doc indexing and search
323
+ case "index_docs":
324
+ case "gitmem-idx":
325
+ result = await indexDocs(toolArgs);
326
+ break;
327
+ case "search_docs":
328
+ case "gitmem-sd":
329
+ result = await searchDocsHandler(toolArgs);
330
+ break;
318
331
  default:
319
332
  throw new Error(`Unknown tool: ${name}`);
320
333
  }
@@ -0,0 +1,45 @@
1
+ /**
2
+ * Document Chunker — Split markdown files into searchable chunks
3
+ *
4
+ * Strategy:
5
+ * 1. Split on H2 headers first (natural semantic boundaries)
6
+ * 2. If a section exceeds target size, split on paragraph boundaries
7
+ * 3. Each chunk carries metadata: file path, title, category, chunk index
8
+ *
9
+ * Target chunk size: 500-800 tokens (~2000-3200 chars)
10
+ */
11
+ export interface DocChunk {
12
+ file_path: string;
13
+ chunk_index: number;
14
+ title: string;
15
+ section_title: string;
16
+ category: string;
17
+ content: string;
18
+ file_hash: string;
19
+ }
20
+ export interface DocFile {
21
+ absolute_path: string;
22
+ relative_path: string;
23
+ content: string;
24
+ hash: string;
25
+ }
26
+ /**
27
+ * Chunk a single markdown file into searchable segments
28
+ */
29
+ export declare function chunkDocument(doc: DocFile): DocChunk[];
30
+ /**
31
+ * Scan a directory for markdown files
32
+ */
33
+ export declare function scanDirectory(dirPath: string, options?: {
34
+ exclude?: string[];
35
+ }): DocFile[];
36
+ /**
37
+ * Chunk all markdown files in a directory
38
+ */
39
+ export declare function chunkDirectory(dirPath: string, options?: {
40
+ exclude?: string[];
41
+ }): {
42
+ files: DocFile[];
43
+ chunks: DocChunk[];
44
+ };
45
+ //# sourceMappingURL=doc-chunker.d.ts.map
@@ -0,0 +1,208 @@
1
+ /**
2
+ * Document Chunker — Split markdown files into searchable chunks
3
+ *
4
+ * Strategy:
5
+ * 1. Split on H2 headers first (natural semantic boundaries)
6
+ * 2. If a section exceeds target size, split on paragraph boundaries
7
+ * 3. Each chunk carries metadata: file path, title, category, chunk index
8
+ *
9
+ * Target chunk size: 500-800 tokens (~2000-3200 chars)
10
+ */
11
+ import * as fs from "fs";
12
+ import * as path from "path";
13
+ import * as crypto from "crypto";
14
+ const TARGET_CHUNK_CHARS = 2400; // ~600 tokens
15
+ const MAX_CHUNK_CHARS = 3600; // ~900 tokens hard limit
16
+ const MIN_CHUNK_CHARS = 200; // Don't create tiny chunks
17
+ /**
18
+ * Extract title from markdown content (first H1, or filename)
19
+ */
20
+ function extractTitle(content, filePath) {
21
+ const h1Match = content.match(/^#\s+(.+)$/m);
22
+ if (h1Match)
23
+ return h1Match[1].trim();
24
+ // Fall back to filename without extension
25
+ return path.basename(filePath, ".md").replace(/[-_]/g, " ");
26
+ }
27
+ /**
28
+ * Extract category from directory structure
29
+ */
30
+ function extractCategory(relativePath) {
31
+ const parts = relativePath.split(path.sep);
32
+ if (parts.length > 1)
33
+ return parts[0];
34
+ return "root";
35
+ }
36
+ /**
37
+ * Split markdown into sections by H2 headers
38
+ */
39
+ function splitByH2(content) {
40
+ const sections = [];
41
+ const lines = content.split("\n");
42
+ let currentTitle = "";
43
+ let currentLines = [];
44
+ for (const line of lines) {
45
+ const h2Match = line.match(/^##\s+(.+)$/);
46
+ if (h2Match) {
47
+ // Save previous section if it has content
48
+ if (currentLines.length > 0) {
49
+ const text = currentLines.join("\n").trim();
50
+ if (text.length > 0) {
51
+ sections.push({ title: currentTitle, content: text });
52
+ }
53
+ }
54
+ currentTitle = h2Match[1].trim();
55
+ currentLines = [];
56
+ }
57
+ else {
58
+ currentLines.push(line);
59
+ }
60
+ }
61
+ // Don't forget the last section
62
+ if (currentLines.length > 0) {
63
+ const text = currentLines.join("\n").trim();
64
+ if (text.length > 0) {
65
+ sections.push({ title: currentTitle, content: text });
66
+ }
67
+ }
68
+ return sections;
69
+ }
70
+ /**
71
+ * Split a text blob on paragraph boundaries to fit within target size
72
+ */
73
+ function splitByParagraphs(text, maxChars) {
74
+ if (text.length <= maxChars)
75
+ return [text];
76
+ const chunks = [];
77
+ const paragraphs = text.split(/\n\n+/);
78
+ let current = "";
79
+ for (const para of paragraphs) {
80
+ if (current.length + para.length + 2 > maxChars && current.length > 0) {
81
+ chunks.push(current.trim());
82
+ current = para;
83
+ }
84
+ else {
85
+ current = current ? current + "\n\n" + para : para;
86
+ }
87
+ }
88
+ if (current.trim().length > 0) {
89
+ chunks.push(current.trim());
90
+ }
91
+ return chunks;
92
+ }
93
+ /**
94
+ * Compute SHA-256 hash of content
95
+ */
96
+ function hashContent(content) {
97
+ return crypto.createHash("sha256").update(content).digest("hex");
98
+ }
99
+ /**
100
+ * Chunk a single markdown file into searchable segments
101
+ */
102
+ export function chunkDocument(doc) {
103
+ const title = extractTitle(doc.content, doc.relative_path);
104
+ const category = extractCategory(doc.relative_path);
105
+ const chunks = [];
106
+ let chunkIndex = 0;
107
+ // Split by H2 headers
108
+ const sections = splitByH2(doc.content);
109
+ for (const section of sections) {
110
+ // If section fits in one chunk, use it directly
111
+ if (section.content.length <= MAX_CHUNK_CHARS) {
112
+ if (section.content.length >= MIN_CHUNK_CHARS) {
113
+ chunks.push({
114
+ file_path: doc.relative_path,
115
+ chunk_index: chunkIndex++,
116
+ title,
117
+ section_title: section.title,
118
+ category,
119
+ content: section.content,
120
+ file_hash: doc.hash,
121
+ });
122
+ }
123
+ }
124
+ else {
125
+ // Section too large — split by paragraphs
126
+ const subChunks = splitByParagraphs(section.content, TARGET_CHUNK_CHARS);
127
+ for (const sub of subChunks) {
128
+ if (sub.length >= MIN_CHUNK_CHARS) {
129
+ chunks.push({
130
+ file_path: doc.relative_path,
131
+ chunk_index: chunkIndex++,
132
+ title,
133
+ section_title: section.title,
134
+ category,
135
+ content: sub,
136
+ file_hash: doc.hash,
137
+ });
138
+ }
139
+ }
140
+ }
141
+ }
142
+ // Edge case: file with no H2 headers and short content — one chunk
143
+ if (chunks.length === 0 && doc.content.trim().length >= MIN_CHUNK_CHARS) {
144
+ chunks.push({
145
+ file_path: doc.relative_path,
146
+ chunk_index: 0,
147
+ title,
148
+ section_title: "",
149
+ category,
150
+ content: doc.content.trim().slice(0, MAX_CHUNK_CHARS),
151
+ file_hash: doc.hash,
152
+ });
153
+ }
154
+ return chunks;
155
+ }
156
+ /**
157
+ * Scan a directory for markdown files
158
+ */
159
+ export function scanDirectory(dirPath, options = {}) {
160
+ const exclude = options.exclude || ["_archive", "node_modules", ".git"];
161
+ const files = [];
162
+ function walk(currentPath) {
163
+ let entries;
164
+ try {
165
+ entries = fs.readdirSync(currentPath, { withFileTypes: true });
166
+ }
167
+ catch {
168
+ return; // Permission denied or inaccessible
169
+ }
170
+ for (const entry of entries) {
171
+ const fullPath = path.join(currentPath, entry.name);
172
+ if (entry.isDirectory()) {
173
+ if (!exclude.includes(entry.name)) {
174
+ walk(fullPath);
175
+ }
176
+ }
177
+ else if (entry.isFile() && entry.name.endsWith(".md")) {
178
+ try {
179
+ const content = fs.readFileSync(fullPath, "utf-8");
180
+ const relativePath = path.relative(dirPath, fullPath);
181
+ files.push({
182
+ absolute_path: fullPath,
183
+ relative_path: relativePath,
184
+ content,
185
+ hash: hashContent(content),
186
+ });
187
+ }
188
+ catch {
189
+ // Skip unreadable files
190
+ }
191
+ }
192
+ }
193
+ }
194
+ walk(dirPath);
195
+ return files;
196
+ }
197
+ /**
198
+ * Chunk all markdown files in a directory
199
+ */
200
+ export function chunkDirectory(dirPath, options = {}) {
201
+ const files = scanDirectory(dirPath, options);
202
+ const chunks = [];
203
+ for (const file of files) {
204
+ chunks.push(...chunkDocument(file));
205
+ }
206
+ return { files, chunks };
207
+ }
208
+ //# sourceMappingURL=doc-chunker.js.map
@@ -0,0 +1,88 @@
1
+ /**
2
+ * Document Index — Storage and search for indexed doc chunks
3
+ *
4
+ * Supports two backends:
5
+ * - Free tier: Local JSON file with BM25 keyword search
6
+ * - Pro/dev tier: In-memory vector index with embeddings
7
+ *
8
+ * Follows the same patterns as local-vector-search.ts and local-file-storage.ts
9
+ */
10
+ import type { DocChunk } from "./doc-chunker.js";
11
+ export interface IndexedDocChunk {
12
+ id: string;
13
+ file_path: string;
14
+ chunk_index: number;
15
+ title: string;
16
+ section_title: string;
17
+ category: string;
18
+ content: string;
19
+ file_hash: string;
20
+ project: string;
21
+ embedding?: number[];
22
+ indexed_at: string;
23
+ }
24
+ export interface DocSearchResult {
25
+ id: string;
26
+ file_path: string;
27
+ chunk_index: number;
28
+ title: string;
29
+ section_title: string;
30
+ category: string;
31
+ content: string;
32
+ similarity: number;
33
+ project: string;
34
+ }
35
+ export interface IndexStats {
36
+ total_chunks: number;
37
+ total_files: number;
38
+ files_indexed: string[];
39
+ categories: Record<string, number>;
40
+ project: string;
41
+ has_embeddings: boolean;
42
+ }
43
+ /**
44
+ * Index doc chunks into storage.
45
+ *
46
+ * - Removes old chunks for the same project + file_path
47
+ * - Generates embeddings if available (pro/dev tier)
48
+ * - Stores to local JSON file
49
+ * - Loads into in-memory vector index if embeddings present
50
+ *
51
+ * Returns count of chunks indexed.
52
+ */
53
+ export declare function indexChunks(chunks: DocChunk[], project: string, options?: {
54
+ batchSize?: number;
55
+ }): Promise<{
56
+ indexed: number;
57
+ embedded: number;
58
+ errors: number;
59
+ }>;
60
+ /**
61
+ * Search indexed docs using semantic similarity (pro/dev) or BM25 (free)
62
+ */
63
+ export declare function searchDocs(query: string, options?: {
64
+ project?: string;
65
+ category?: string;
66
+ match_count?: number;
67
+ }): Promise<DocSearchResult[]>;
68
+ /**
69
+ * Get index statistics
70
+ */
71
+ export declare function getIndexStats(project?: string): IndexStats;
72
+ /**
73
+ * Check which files have changed since last index (by hash)
74
+ */
75
+ export declare function getChangedFiles(fileHashes: Map<string, string>, project: string): {
76
+ changed: string[];
77
+ unchanged: string[];
78
+ new_files: string[];
79
+ };
80
+ /**
81
+ * Initialize vector index from local storage on startup
82
+ */
83
+ export declare function initDocVectorIndex(): void;
84
+ /**
85
+ * Clear the doc index for a project (or all)
86
+ */
87
+ export declare function clearDocIndex(project?: string): number;
88
+ //# sourceMappingURL=doc-index.d.ts.map