@mhalder/qdrant-mcp-server 2.1.2 → 3.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +21 -0
- package/README.md +78 -3
- package/build/code/indexer.d.ts +2 -1
- package/build/code/indexer.d.ts.map +1 -1
- package/build/code/indexer.js +58 -25
- package/build/code/indexer.js.map +1 -1
- package/build/git/chunker.d.ts +39 -0
- package/build/git/chunker.d.ts.map +1 -0
- package/build/git/chunker.js +210 -0
- package/build/git/chunker.js.map +1 -0
- package/build/git/chunker.test.d.ts +2 -0
- package/build/git/chunker.test.d.ts.map +1 -0
- package/build/git/chunker.test.js +230 -0
- package/build/git/chunker.test.js.map +1 -0
- package/build/git/config.d.ts +34 -0
- package/build/git/config.d.ts.map +1 -0
- package/build/git/config.js +163 -0
- package/build/git/config.js.map +1 -0
- package/build/git/extractor.d.ts +57 -0
- package/build/git/extractor.d.ts.map +1 -0
- package/build/git/extractor.integration.test.d.ts +6 -0
- package/build/git/extractor.integration.test.d.ts.map +1 -0
- package/build/git/extractor.integration.test.js +166 -0
- package/build/git/extractor.integration.test.js.map +1 -0
- package/build/git/extractor.js +231 -0
- package/build/git/extractor.js.map +1 -0
- package/build/git/extractor.test.d.ts +2 -0
- package/build/git/extractor.test.d.ts.map +1 -0
- package/build/git/extractor.test.js +267 -0
- package/build/git/extractor.test.js.map +1 -0
- package/build/git/index.d.ts +10 -0
- package/build/git/index.d.ts.map +1 -0
- package/build/git/index.js +11 -0
- package/build/git/index.js.map +1 -0
- package/build/git/indexer.d.ts +50 -0
- package/build/git/indexer.d.ts.map +1 -0
- package/build/git/indexer.js +588 -0
- package/build/git/indexer.js.map +1 -0
- package/build/git/indexer.test.d.ts +2 -0
- package/build/git/indexer.test.d.ts.map +1 -0
- package/build/git/indexer.test.js +867 -0
- package/build/git/indexer.test.js.map +1 -0
- package/build/git/sync/synchronizer.d.ts +43 -0
- package/build/git/sync/synchronizer.d.ts.map +1 -0
- package/build/git/sync/synchronizer.js +108 -0
- package/build/git/sync/synchronizer.js.map +1 -0
- package/build/git/sync/synchronizer.test.d.ts +2 -0
- package/build/git/sync/synchronizer.test.d.ts.map +1 -0
- package/build/git/sync/synchronizer.test.js +188 -0
- package/build/git/sync/synchronizer.test.js.map +1 -0
- package/build/git/types.d.ts +159 -0
- package/build/git/types.d.ts.map +1 -0
- package/build/git/types.js +5 -0
- package/build/git/types.js.map +1 -0
- package/build/index.js +18 -0
- package/build/index.js.map +1 -1
- package/build/qdrant/client.d.ts +5 -0
- package/build/qdrant/client.d.ts.map +1 -1
- package/build/qdrant/client.js +10 -0
- package/build/qdrant/client.js.map +1 -1
- package/build/qdrant/client.test.js +25 -0
- package/build/qdrant/client.test.js.map +1 -1
- package/build/tools/git-history.d.ts +10 -0
- package/build/tools/git-history.d.ts.map +1 -0
- package/build/tools/git-history.js +144 -0
- package/build/tools/git-history.js.map +1 -0
- package/build/tools/index.d.ts +2 -0
- package/build/tools/index.d.ts.map +1 -1
- package/build/tools/index.js +4 -0
- package/build/tools/index.js.map +1 -1
- package/build/tools/schemas.d.ts +24 -0
- package/build/tools/schemas.d.ts.map +1 -1
- package/build/tools/schemas.js +64 -0
- package/build/tools/schemas.js.map +1 -1
- package/package.json +1 -1
- package/src/code/indexer.ts +73 -24
- package/src/git/chunker.test.ts +284 -0
- package/src/git/chunker.ts +256 -0
- package/src/git/config.ts +173 -0
- package/src/git/extractor.integration.test.ts +221 -0
- package/src/git/extractor.test.ts +403 -0
- package/src/git/extractor.ts +284 -0
- package/src/git/index.ts +31 -0
- package/src/git/indexer.test.ts +1089 -0
- package/src/git/indexer.ts +745 -0
- package/src/git/sync/synchronizer.test.ts +250 -0
- package/src/git/sync/synchronizer.ts +122 -0
- package/src/git/types.ts +192 -0
- package/src/index.ts +42 -0
- package/src/qdrant/client.test.ts +29 -0
- package/src/qdrant/client.ts +14 -0
- package/src/tools/git-history.ts +208 -0
- package/src/tools/index.ts +7 -0
- package/src/tools/schemas.ts +75 -0
- package/tests/code/chunker/tree-sitter-chunker.test.ts +87 -5
- package/tests/code/indexer.test.ts +121 -0
- package/tests/code/integration.test.ts +14 -0
- package/tests/code/scanner.test.ts +81 -6
- package/tests/code/sync/snapshot.test.ts +55 -4
- package/tests/code/sync/synchronizer.test.ts +86 -10
- package/vitest.config.ts +2 -0
|
@@ -0,0 +1,256 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CommitChunker - Create embeddable chunks from git commits
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import { createHash } from "node:crypto";
|
|
6
|
+
import { COMMIT_TYPE_PATTERNS } from "./config.js";
|
|
7
|
+
import type { CommitChunk, CommitType, GitConfig, RawCommit } from "./types.js";
|
|
8
|
+
|
|
9
|
+
export class CommitChunker {
|
|
10
|
+
constructor(private config: GitConfig) {}
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* Classify commit type based on commit message
|
|
14
|
+
*/
|
|
15
|
+
classifyCommitType(commit: RawCommit): CommitType {
|
|
16
|
+
const message = `${commit.subject} ${commit.body}`.toLowerCase();
|
|
17
|
+
|
|
18
|
+
// Check each type's patterns
|
|
19
|
+
for (const { type, patterns } of COMMIT_TYPE_PATTERNS) {
|
|
20
|
+
for (const pattern of patterns) {
|
|
21
|
+
if (pattern.test(commit.subject) || pattern.test(message)) {
|
|
22
|
+
return type;
|
|
23
|
+
}
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
return "other";
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
/**
|
|
31
|
+
* Create embeddable chunks from a commit
|
|
32
|
+
* Currently produces one chunk per commit, but could be extended
|
|
33
|
+
* to handle very large commits differently
|
|
34
|
+
*/
|
|
35
|
+
createChunks(
|
|
36
|
+
commit: RawCommit,
|
|
37
|
+
repoPath: string,
|
|
38
|
+
diff?: string,
|
|
39
|
+
): CommitChunk[] {
|
|
40
|
+
const commitType = this.classifyCommitType(commit);
|
|
41
|
+
const content = this.formatChunkContent(commit, commitType, diff);
|
|
42
|
+
|
|
43
|
+
// Check if content exceeds max chunk size
|
|
44
|
+
if (content.length > this.config.maxChunkSize) {
|
|
45
|
+
// Truncate content but keep essential metadata visible
|
|
46
|
+
const truncatedContent = this.truncateContent(
|
|
47
|
+
content,
|
|
48
|
+
commit,
|
|
49
|
+
commitType,
|
|
50
|
+
);
|
|
51
|
+
return [
|
|
52
|
+
{
|
|
53
|
+
content: truncatedContent,
|
|
54
|
+
metadata: this.createMetadata(commit, commitType, repoPath),
|
|
55
|
+
},
|
|
56
|
+
];
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
return [
|
|
60
|
+
{
|
|
61
|
+
content,
|
|
62
|
+
metadata: this.createMetadata(commit, commitType, repoPath),
|
|
63
|
+
},
|
|
64
|
+
];
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Generate deterministic chunk ID from commit content
|
|
69
|
+
*/
|
|
70
|
+
generateChunkId(chunk: CommitChunk): string {
|
|
71
|
+
// Use commit hash + repo path for deterministic ID
|
|
72
|
+
const data = `${chunk.metadata.commitHash}:${chunk.metadata.repoPath}`;
|
|
73
|
+
const hash = createHash("sha256").update(data).digest("hex");
|
|
74
|
+
return `gitcommit_${hash.substring(0, 16)}`;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
/**
|
|
78
|
+
* Format the chunk content for embedding
|
|
79
|
+
*/
|
|
80
|
+
private formatChunkContent(
|
|
81
|
+
commit: RawCommit,
|
|
82
|
+
commitType: CommitType,
|
|
83
|
+
diff?: string,
|
|
84
|
+
): string {
|
|
85
|
+
const lines: string[] = [];
|
|
86
|
+
|
|
87
|
+
// Header section
|
|
88
|
+
lines.push(`Commit: ${commit.shortHash}`);
|
|
89
|
+
lines.push(`Type: ${commitType}`);
|
|
90
|
+
lines.push(`Author: ${commit.author}`);
|
|
91
|
+
lines.push(`Date: ${commit.date.toISOString().split("T")[0]}`);
|
|
92
|
+
lines.push("");
|
|
93
|
+
|
|
94
|
+
// Subject line
|
|
95
|
+
lines.push(`Subject: ${commit.subject}`);
|
|
96
|
+
lines.push("");
|
|
97
|
+
|
|
98
|
+
// Body (description) if present
|
|
99
|
+
if (commit.body.trim()) {
|
|
100
|
+
lines.push("Description:");
|
|
101
|
+
lines.push(commit.body.trim());
|
|
102
|
+
lines.push("");
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Files changed
|
|
106
|
+
if (this.config.includeFileList && commit.files.length > 0) {
|
|
107
|
+
lines.push(`Files changed (${commit.files.length}):`);
|
|
108
|
+
for (const file of commit.files.slice(0, 20)) {
|
|
109
|
+
// Limit to 20 files
|
|
110
|
+
lines.push(` - ${file}`);
|
|
111
|
+
}
|
|
112
|
+
if (commit.files.length > 20) {
|
|
113
|
+
lines.push(` ... and ${commit.files.length - 20} more files`);
|
|
114
|
+
}
|
|
115
|
+
lines.push("");
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// Change stats
|
|
119
|
+
lines.push(`Changes: +${commit.insertions} -${commit.deletions}`);
|
|
120
|
+
|
|
121
|
+
// Diff preview if included
|
|
122
|
+
if (this.config.includeDiff && diff) {
|
|
123
|
+
lines.push("");
|
|
124
|
+
lines.push("Diff preview:");
|
|
125
|
+
lines.push(this.extractDiffPreview(diff));
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
return lines.join("\n");
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
/**
|
|
132
|
+
* Extract a readable preview from the diff
|
|
133
|
+
*/
|
|
134
|
+
private extractDiffPreview(diff: string): string {
|
|
135
|
+
// Remove the commit metadata from the top of git show output
|
|
136
|
+
const diffStart = diff.indexOf("diff --git");
|
|
137
|
+
if (diffStart === -1) return "";
|
|
138
|
+
|
|
139
|
+
const diffContent = diff.substring(diffStart);
|
|
140
|
+
|
|
141
|
+
// Extract just the actual changes (lines starting with + or -)
|
|
142
|
+
// but not the diff headers
|
|
143
|
+
const lines = diffContent.split("\n");
|
|
144
|
+
const changeLines: string[] = [];
|
|
145
|
+
let currentFile = "";
|
|
146
|
+
|
|
147
|
+
for (const line of lines) {
|
|
148
|
+
if (line.startsWith("diff --git")) {
|
|
149
|
+
// Extract filename from diff header
|
|
150
|
+
const match = line.match(/diff --git a\/.+ b\/(.+)/);
|
|
151
|
+
if (match) {
|
|
152
|
+
currentFile = match[1];
|
|
153
|
+
}
|
|
154
|
+
} else if (line.startsWith("@@")) {
|
|
155
|
+
// Include hunk header with file context
|
|
156
|
+
if (currentFile && changeLines.length > 0) {
|
|
157
|
+
changeLines.push(`--- ${currentFile} ---`);
|
|
158
|
+
}
|
|
159
|
+
changeLines.push(line);
|
|
160
|
+
} else if (
|
|
161
|
+
(line.startsWith("+") || line.startsWith("-")) &&
|
|
162
|
+
!line.startsWith("+++") &&
|
|
163
|
+
!line.startsWith("---")
|
|
164
|
+
) {
|
|
165
|
+
changeLines.push(line);
|
|
166
|
+
}
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
// Limit the preview size
|
|
170
|
+
const maxPreviewLines = 50;
|
|
171
|
+
if (changeLines.length > maxPreviewLines) {
|
|
172
|
+
return (
|
|
173
|
+
changeLines.slice(0, maxPreviewLines).join("\n") +
|
|
174
|
+
`\n... (${changeLines.length - maxPreviewLines} more lines)`
|
|
175
|
+
);
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
return changeLines.join("\n");
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
/**
|
|
182
|
+
* Truncate content while preserving essential information
|
|
183
|
+
*/
|
|
184
|
+
private truncateContent(
|
|
185
|
+
content: string,
|
|
186
|
+
commit: RawCommit,
|
|
187
|
+
commitType: CommitType,
|
|
188
|
+
): string {
|
|
189
|
+
// Keep the header and subject, truncate the rest
|
|
190
|
+
const essentialLines: string[] = [
|
|
191
|
+
`Commit: ${commit.shortHash}`,
|
|
192
|
+
`Type: ${commitType}`,
|
|
193
|
+
`Author: ${commit.author}`,
|
|
194
|
+
`Date: ${commit.date.toISOString().split("T")[0]}`,
|
|
195
|
+
"",
|
|
196
|
+
`Subject: ${commit.subject}`,
|
|
197
|
+
"",
|
|
198
|
+
];
|
|
199
|
+
|
|
200
|
+
// Add truncated body if present
|
|
201
|
+
if (commit.body.trim()) {
|
|
202
|
+
const maxBodyLength = 500;
|
|
203
|
+
const body = commit.body.trim();
|
|
204
|
+
if (body.length > maxBodyLength) {
|
|
205
|
+
essentialLines.push("Description:");
|
|
206
|
+
essentialLines.push(body.substring(0, maxBodyLength) + "...");
|
|
207
|
+
essentialLines.push("");
|
|
208
|
+
} else {
|
|
209
|
+
essentialLines.push("Description:");
|
|
210
|
+
essentialLines.push(body);
|
|
211
|
+
essentialLines.push("");
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
|
|
215
|
+
// Add file summary
|
|
216
|
+
if (commit.files.length > 0) {
|
|
217
|
+
essentialLines.push(`Files changed (${commit.files.length}):`);
|
|
218
|
+
for (const file of commit.files.slice(0, 10)) {
|
|
219
|
+
essentialLines.push(` - ${file}`);
|
|
220
|
+
}
|
|
221
|
+
if (commit.files.length > 10) {
|
|
222
|
+
essentialLines.push(` ... and ${commit.files.length - 10} more files`);
|
|
223
|
+
}
|
|
224
|
+
essentialLines.push("");
|
|
225
|
+
}
|
|
226
|
+
|
|
227
|
+
essentialLines.push(`Changes: +${commit.insertions} -${commit.deletions}`);
|
|
228
|
+
essentialLines.push("");
|
|
229
|
+
essentialLines.push("[content truncated due to size]");
|
|
230
|
+
|
|
231
|
+
return essentialLines.join("\n");
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
/**
|
|
235
|
+
* Create metadata object for a chunk
|
|
236
|
+
*/
|
|
237
|
+
private createMetadata(
|
|
238
|
+
commit: RawCommit,
|
|
239
|
+
commitType: CommitType,
|
|
240
|
+
repoPath: string,
|
|
241
|
+
): CommitChunk["metadata"] {
|
|
242
|
+
return {
|
|
243
|
+
commitHash: commit.hash,
|
|
244
|
+
shortHash: commit.shortHash,
|
|
245
|
+
author: commit.author,
|
|
246
|
+
authorEmail: commit.authorEmail,
|
|
247
|
+
date: commit.date.toISOString(),
|
|
248
|
+
subject: commit.subject,
|
|
249
|
+
commitType,
|
|
250
|
+
files: commit.files,
|
|
251
|
+
insertions: commit.insertions,
|
|
252
|
+
deletions: commit.deletions,
|
|
253
|
+
repoPath,
|
|
254
|
+
};
|
|
255
|
+
}
|
|
256
|
+
}
|
|
@@ -0,0 +1,173 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Default configuration and constants for git history indexing
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import type { CommitType, GitConfig } from "./types.js";
|
|
6
|
+
|
|
7
|
+
/**
|
|
8
|
+
* Default configuration for git history indexing
|
|
9
|
+
*/
|
|
10
|
+
export const DEFAULT_GIT_CONFIG: GitConfig = {
|
|
11
|
+
maxCommits: 5000,
|
|
12
|
+
includeFileList: true,
|
|
13
|
+
includeDiff: true,
|
|
14
|
+
maxDiffSize: 5000, // 5KB max diff per commit
|
|
15
|
+
gitTimeout: 300000, // 5 minutes timeout for git commands
|
|
16
|
+
maxChunkSize: 3000,
|
|
17
|
+
batchSize: 100,
|
|
18
|
+
batchRetryAttempts: 3, // Retry failed batches up to 3 times
|
|
19
|
+
defaultSearchLimit: 10,
|
|
20
|
+
enableHybridSearch: true,
|
|
21
|
+
};
|
|
22
|
+
|
|
23
|
+
/**
|
|
24
|
+
* Patterns for classifying commit types based on conventional commits
|
|
25
|
+
* Order matters - first match wins
|
|
26
|
+
*/
|
|
27
|
+
export const COMMIT_TYPE_PATTERNS: Array<{
|
|
28
|
+
type: CommitType;
|
|
29
|
+
patterns: RegExp[];
|
|
30
|
+
}> = [
|
|
31
|
+
{
|
|
32
|
+
type: "feat",
|
|
33
|
+
patterns: [
|
|
34
|
+
/^feat(\(.+\))?[!:]/, // feat: or feat(scope):
|
|
35
|
+
/^feature(\(.+\))?[!:]/,
|
|
36
|
+
/\badd(ed|s|ing)?\b.*\b(feature|functionality|support)/i,
|
|
37
|
+
/\bimplement(ed|s|ing)?\b/i,
|
|
38
|
+
/\bnew\b.*\b(feature|functionality)/i,
|
|
39
|
+
],
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
type: "fix",
|
|
43
|
+
patterns: [
|
|
44
|
+
/^fix(\(.+\))?[!:]/,
|
|
45
|
+
/^bugfix(\(.+\))?[!:]/,
|
|
46
|
+
/^hotfix(\(.+\))?[!:]/,
|
|
47
|
+
/\bfix(ed|es|ing)?\b.*\b(bug|issue|problem|error)/i,
|
|
48
|
+
/\bresolve[ds]?\b.*\b(issue|bug|problem)/i,
|
|
49
|
+
/\bcorrect(ed|s|ing)?\b/i,
|
|
50
|
+
],
|
|
51
|
+
},
|
|
52
|
+
{
|
|
53
|
+
type: "refactor",
|
|
54
|
+
patterns: [
|
|
55
|
+
/^refactor(\(.+\))?[!:]/,
|
|
56
|
+
/\brefactor(ed|s|ing)?\b/i,
|
|
57
|
+
/\brestructur(ed|es|ing)?\b/i,
|
|
58
|
+
/\breorganiz(ed|es|ing)?\b/i,
|
|
59
|
+
/\bclean(ed|s|ing)?\s*up\b/i,
|
|
60
|
+
],
|
|
61
|
+
},
|
|
62
|
+
{
|
|
63
|
+
type: "docs",
|
|
64
|
+
patterns: [
|
|
65
|
+
/^docs?(\(.+\))?[!:]/,
|
|
66
|
+
/\bdocument(ed|s|ing|ation)?\b/i,
|
|
67
|
+
/\breadme\b/i,
|
|
68
|
+
/\bchangelog\b/i,
|
|
69
|
+
/\bcomments?\b/i,
|
|
70
|
+
/\bjsdoc\b/i,
|
|
71
|
+
/\btypedoc\b/i,
|
|
72
|
+
],
|
|
73
|
+
},
|
|
74
|
+
{
|
|
75
|
+
type: "test",
|
|
76
|
+
patterns: [
|
|
77
|
+
/^test(\(.+\))?[!:]/,
|
|
78
|
+
/^tests?(\(.+\))?[!:]/,
|
|
79
|
+
/\btest(ed|s|ing)?\b/i,
|
|
80
|
+
/\bspec(s)?\b/i,
|
|
81
|
+
/\bcoverage\b/i,
|
|
82
|
+
/\bunit\s*test/i,
|
|
83
|
+
/\bintegration\s*test/i,
|
|
84
|
+
/\be2e\b/i,
|
|
85
|
+
],
|
|
86
|
+
},
|
|
87
|
+
{
|
|
88
|
+
type: "chore",
|
|
89
|
+
patterns: [
|
|
90
|
+
/^chore(\(.+\))?[!:]/,
|
|
91
|
+
/\bchore\b/i,
|
|
92
|
+
/\bmaintenance\b/i,
|
|
93
|
+
/\bdependenc(y|ies)\b/i,
|
|
94
|
+
/\bbump(ed|s|ing)?\b.*\bversion/i,
|
|
95
|
+
/\bupgrade[ds]?\b/i,
|
|
96
|
+
/\bupdate[ds]?\b.*\b(dep|package|lock)/i,
|
|
97
|
+
],
|
|
98
|
+
},
|
|
99
|
+
{
|
|
100
|
+
type: "style",
|
|
101
|
+
patterns: [
|
|
102
|
+
/^style(\(.+\))?[!:]/,
|
|
103
|
+
/\bformat(ted|s|ting)?\b/i,
|
|
104
|
+
/\blint(ed|s|ing)?\b/i,
|
|
105
|
+
/\bprettier\b/i,
|
|
106
|
+
/\beslint\b/i,
|
|
107
|
+
/\bwhitespace\b/i,
|
|
108
|
+
/\bindentation\b/i,
|
|
109
|
+
],
|
|
110
|
+
},
|
|
111
|
+
{
|
|
112
|
+
type: "perf",
|
|
113
|
+
patterns: [
|
|
114
|
+
/^perf(\(.+\))?[!:]/,
|
|
115
|
+
/^performance(\(.+\))?[!:]/,
|
|
116
|
+
/\bperformance\b/i,
|
|
117
|
+
/\boptimiz(ed|es|ing|ation)?\b/i,
|
|
118
|
+
/\bspeed\s*up\b/i,
|
|
119
|
+
/\bfaster\b/i,
|
|
120
|
+
/\bcach(e|ed|ing)\b/i,
|
|
121
|
+
],
|
|
122
|
+
},
|
|
123
|
+
{
|
|
124
|
+
type: "build",
|
|
125
|
+
patterns: [
|
|
126
|
+
/^build(\(.+\))?[!:]/,
|
|
127
|
+
/\bbuild\b/i,
|
|
128
|
+
/\bwebpack\b/i,
|
|
129
|
+
/\brollup\b/i,
|
|
130
|
+
/\bvite\b/i,
|
|
131
|
+
/\bbundl(e|ed|er|ing)\b/i,
|
|
132
|
+
/\bcompil(e|ed|er|ing|ation)\b/i,
|
|
133
|
+
],
|
|
134
|
+
},
|
|
135
|
+
{
|
|
136
|
+
type: "ci",
|
|
137
|
+
patterns: [
|
|
138
|
+
/^ci(\(.+\))?[!:]/,
|
|
139
|
+
/\bci\b/i,
|
|
140
|
+
/\bgithub\s*actions?\b/i,
|
|
141
|
+
/\bworkflow\b/i,
|
|
142
|
+
/\bpipeline\b/i,
|
|
143
|
+
/\btravis\b/i,
|
|
144
|
+
/\bcircle\s*ci\b/i,
|
|
145
|
+
/\bjenkins\b/i,
|
|
146
|
+
],
|
|
147
|
+
},
|
|
148
|
+
{
|
|
149
|
+
type: "revert",
|
|
150
|
+
patterns: [/^revert(\(.+\))?[!:]/, /\brevert(ed|s|ing)?\b/i],
|
|
151
|
+
},
|
|
152
|
+
];
|
|
153
|
+
|
|
154
|
+
/**
|
|
155
|
+
* Git log format string for extracting structured commit data
|
|
156
|
+
* Format: hash|shortHash|author|authorEmail|date|subject|body
|
|
157
|
+
*/
|
|
158
|
+
export const GIT_LOG_FORMAT = "%H|%h|%an|%ae|%aI|%s|%b";
|
|
159
|
+
|
|
160
|
+
/**
|
|
161
|
+
* Delimiter used in git log output to separate commits
|
|
162
|
+
*/
|
|
163
|
+
export const GIT_LOG_COMMIT_DELIMITER = "---COMMIT_DELIMITER---";
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Max buffer size for git operations (50MB)
|
|
167
|
+
*/
|
|
168
|
+
export const GIT_MAX_BUFFER = 50 * 1024 * 1024;
|
|
169
|
+
|
|
170
|
+
/**
|
|
171
|
+
* Reserved ID for storing indexing metadata in the collection
|
|
172
|
+
*/
|
|
173
|
+
export const GIT_INDEXING_METADATA_ID = "__git_indexing_metadata__";
|
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Integration tests for GitExtractor
|
|
3
|
+
* These tests run against real git repositories (not mocked)
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
import { describe, it, expect, beforeAll } from "vitest";
|
|
7
|
+
import { GitExtractor } from "./extractor.js";
|
|
8
|
+
import { DEFAULT_GIT_CONFIG } from "./config.js";
|
|
9
|
+
import type { GitConfig } from "./types.js";
|
|
10
|
+
import { execFile } from "node:child_process";
|
|
11
|
+
import { promisify } from "node:util";
|
|
12
|
+
|
|
13
|
+
const execFileAsync = promisify(execFile);
|
|
14
|
+
|
|
15
|
+
describe("GitExtractor Integration Tests", () => {
|
|
16
|
+
let extractor: GitExtractor;
|
|
17
|
+
const config: GitConfig = { ...DEFAULT_GIT_CONFIG, maxCommits: 100 };
|
|
18
|
+
|
|
19
|
+
// Use the current repository for integration tests
|
|
20
|
+
const repoPath = process.cwd();
|
|
21
|
+
|
|
22
|
+
beforeAll(async () => {
|
|
23
|
+
extractor = new GitExtractor(repoPath, config);
|
|
24
|
+
|
|
25
|
+
// Verify we're in a git repository
|
|
26
|
+
const isRepo = await extractor.validateRepository();
|
|
27
|
+
if (!isRepo) {
|
|
28
|
+
throw new Error("Integration tests must be run from a git repository");
|
|
29
|
+
}
|
|
30
|
+
});
|
|
31
|
+
|
|
32
|
+
describe("validateRepository", () => {
|
|
33
|
+
it("should detect valid git repository", async () => {
|
|
34
|
+
const result = await extractor.validateRepository();
|
|
35
|
+
expect(result).toBe(true);
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
it("should return false for non-existent path", async () => {
|
|
39
|
+
const badExtractor = new GitExtractor("/nonexistent/path", config);
|
|
40
|
+
const result = await badExtractor.validateRepository();
|
|
41
|
+
expect(result).toBe(false);
|
|
42
|
+
});
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
describe("getCommits - data integrity", () => {
|
|
46
|
+
it("should extract commits without data corruption", async () => {
|
|
47
|
+
const commits = await extractor.getCommits({ maxCommits: 50 });
|
|
48
|
+
|
|
49
|
+
expect(commits.length).toBeGreaterThan(0);
|
|
50
|
+
expect(commits.length).toBeLessThanOrEqual(50);
|
|
51
|
+
|
|
52
|
+
for (const commit of commits) {
|
|
53
|
+
// Verify hash format (40 hex characters)
|
|
54
|
+
expect(commit.hash).toMatch(/^[a-f0-9]{40}$/);
|
|
55
|
+
|
|
56
|
+
// Verify short hash format (7+ hex characters)
|
|
57
|
+
expect(commit.shortHash).toMatch(/^[a-f0-9]{7,}$/);
|
|
58
|
+
|
|
59
|
+
// Verify author is not empty
|
|
60
|
+
expect(commit.author.length).toBeGreaterThan(0);
|
|
61
|
+
|
|
62
|
+
// Verify author email format
|
|
63
|
+
expect(commit.authorEmail).toMatch(/.+@.+/);
|
|
64
|
+
|
|
65
|
+
// Verify date is valid
|
|
66
|
+
expect(commit.date).toBeInstanceOf(Date);
|
|
67
|
+
expect(commit.date.getTime()).not.toBeNaN();
|
|
68
|
+
|
|
69
|
+
// Verify subject is not empty
|
|
70
|
+
expect(commit.subject.length).toBeGreaterThan(0);
|
|
71
|
+
|
|
72
|
+
// CRITICAL: Verify fields don't contain numstat patterns
|
|
73
|
+
// This catches the parsing bug where numstat bleeds into format fields
|
|
74
|
+
const numstatPattern = /^\d+\s+\d+\s+\S+/;
|
|
75
|
+
expect(commit.hash).not.toMatch(numstatPattern);
|
|
76
|
+
expect(commit.author).not.toMatch(numstatPattern);
|
|
77
|
+
expect(commit.subject).not.toMatch(numstatPattern);
|
|
78
|
+
|
|
79
|
+
// Verify insertions/deletions are non-negative integers
|
|
80
|
+
expect(commit.insertions).toBeGreaterThanOrEqual(0);
|
|
81
|
+
expect(commit.deletions).toBeGreaterThanOrEqual(0);
|
|
82
|
+
expect(Number.isInteger(commit.insertions)).toBe(true);
|
|
83
|
+
expect(Number.isInteger(commit.deletions)).toBe(true);
|
|
84
|
+
|
|
85
|
+
// Verify files array
|
|
86
|
+
expect(Array.isArray(commit.files)).toBe(true);
|
|
87
|
+
for (const file of commit.files) {
|
|
88
|
+
expect(typeof file).toBe("string");
|
|
89
|
+
expect(file.length).toBeGreaterThan(0);
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
});
|
|
93
|
+
|
|
94
|
+
it("should return correct commit count matching git rev-list", async () => {
|
|
95
|
+
// Get expected count from git directly
|
|
96
|
+
const { stdout } = await execFileAsync(
|
|
97
|
+
"git",
|
|
98
|
+
["rev-list", "--count", "-n", "50", "HEAD"],
|
|
99
|
+
{ cwd: repoPath },
|
|
100
|
+
);
|
|
101
|
+
const expectedCount = Math.min(parseInt(stdout.trim(), 10), 50);
|
|
102
|
+
|
|
103
|
+
// Get commits via extractor
|
|
104
|
+
const commits = await extractor.getCommits({ maxCommits: 50 });
|
|
105
|
+
|
|
106
|
+
// Should match exactly
|
|
107
|
+
expect(commits.length).toBe(expectedCount);
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
it("should extract files correctly with stats", async () => {
|
|
111
|
+
// Find a commit with files using git log
|
|
112
|
+
const { stdout: logOutput } = await execFileAsync(
|
|
113
|
+
"git",
|
|
114
|
+
["log", "--oneline", "--shortstat", "-n", "10", "HEAD"],
|
|
115
|
+
{ cwd: repoPath },
|
|
116
|
+
);
|
|
117
|
+
|
|
118
|
+
// If there are commits with files changed, verify our extractor gets them
|
|
119
|
+
if (logOutput.includes("file")) {
|
|
120
|
+
const commits = await extractor.getCommits({ maxCommits: 10 });
|
|
121
|
+
const commitsWithFiles = commits.filter((c) => c.files.length > 0);
|
|
122
|
+
|
|
123
|
+
// At least some commits should have files
|
|
124
|
+
expect(commitsWithFiles.length).toBeGreaterThan(0);
|
|
125
|
+
|
|
126
|
+
// Verify files and stats are consistent
|
|
127
|
+
for (const commit of commitsWithFiles) {
|
|
128
|
+
// If there are files, there should typically be insertions or deletions
|
|
129
|
+
// (unless all files are renames with no changes)
|
|
130
|
+
expect(commit.files.length).toBeGreaterThan(0);
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
});
|
|
134
|
+
});
|
|
135
|
+
|
|
136
|
+
describe("getCommits - range filtering", () => {
|
|
137
|
+
it("should support sinceCommit range filtering", async () => {
|
|
138
|
+
// Get all commits first
|
|
139
|
+
const allCommits = await extractor.getCommits({ maxCommits: 20 });
|
|
140
|
+
|
|
141
|
+
if (allCommits.length >= 5) {
|
|
142
|
+
// Use the 5th commit as the "since" point
|
|
143
|
+
const sinceHash = allCommits[4].hash;
|
|
144
|
+
|
|
145
|
+
// Get commits since that point
|
|
146
|
+
const recentCommits = await extractor.getCommits({
|
|
147
|
+
sinceCommit: sinceHash,
|
|
148
|
+
maxCommits: 20,
|
|
149
|
+
});
|
|
150
|
+
|
|
151
|
+
// Should have fewer commits (the 4 before the since point)
|
|
152
|
+
expect(recentCommits.length).toBeLessThan(allCommits.length);
|
|
153
|
+
expect(recentCommits.length).toBe(4);
|
|
154
|
+
|
|
155
|
+
// Verify the commits are the expected ones
|
|
156
|
+
for (let i = 0; i < recentCommits.length; i++) {
|
|
157
|
+
expect(recentCommits[i].hash).toBe(allCommits[i].hash);
|
|
158
|
+
}
|
|
159
|
+
}
|
|
160
|
+
});
|
|
161
|
+
});
|
|
162
|
+
|
|
163
|
+
describe("getCommitDiff", () => {
|
|
164
|
+
it("should return diff for a valid commit", async () => {
|
|
165
|
+
const commits = await extractor.getCommits({ maxCommits: 1 });
|
|
166
|
+
|
|
167
|
+
if (commits.length > 0) {
|
|
168
|
+
const diff = await extractor.getCommitDiff(commits[0].hash);
|
|
169
|
+
|
|
170
|
+
// Diff should contain commit information
|
|
171
|
+
expect(diff).toContain("commit");
|
|
172
|
+
expect(diff).toContain(commits[0].hash);
|
|
173
|
+
}
|
|
174
|
+
});
|
|
175
|
+
|
|
176
|
+
it("should return empty string for invalid commit", async () => {
|
|
177
|
+
const diff = await extractor.getCommitDiff("0000000000000000000000000000000000000000");
|
|
178
|
+
expect(diff).toBe("");
|
|
179
|
+
});
|
|
180
|
+
});
|
|
181
|
+
|
|
182
|
+
describe("getLatestCommitHash", () => {
|
|
183
|
+
it("should return the HEAD commit hash", async () => {
|
|
184
|
+
const hash = await extractor.getLatestCommitHash();
|
|
185
|
+
|
|
186
|
+
// Verify format
|
|
187
|
+
expect(hash).toMatch(/^[a-f0-9]{40}$/);
|
|
188
|
+
|
|
189
|
+
// Verify it matches git rev-parse HEAD
|
|
190
|
+
const { stdout } = await execFileAsync("git", ["rev-parse", "HEAD"], {
|
|
191
|
+
cwd: repoPath,
|
|
192
|
+
});
|
|
193
|
+
expect(hash).toBe(stdout.trim());
|
|
194
|
+
});
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
describe("getCommitCount", () => {
|
|
198
|
+
it("should return total commit count", async () => {
|
|
199
|
+
const count = await extractor.getCommitCount();
|
|
200
|
+
|
|
201
|
+
// Verify against git rev-list
|
|
202
|
+
const { stdout } = await execFileAsync(
|
|
203
|
+
"git",
|
|
204
|
+
["rev-list", "--count", "HEAD"],
|
|
205
|
+
{ cwd: repoPath },
|
|
206
|
+
);
|
|
207
|
+
expect(count).toBe(parseInt(stdout.trim(), 10));
|
|
208
|
+
});
|
|
209
|
+
|
|
210
|
+
it("should return count since specific commit", async () => {
|
|
211
|
+
const commits = await extractor.getCommits({ maxCommits: 10 });
|
|
212
|
+
|
|
213
|
+
if (commits.length >= 5) {
|
|
214
|
+
const sinceHash = commits[4].hash;
|
|
215
|
+
const count = await extractor.getCommitCount(sinceHash);
|
|
216
|
+
|
|
217
|
+
expect(count).toBe(4); // 4 commits between sinceHash and HEAD
|
|
218
|
+
}
|
|
219
|
+
});
|
|
220
|
+
});
|
|
221
|
+
});
|