@mhalder/qdrant-mcp-server 2.1.2 → 3.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (101) hide show
  1. package/CHANGELOG.md +21 -0
  2. package/README.md +78 -3
  3. package/build/code/indexer.d.ts +2 -1
  4. package/build/code/indexer.d.ts.map +1 -1
  5. package/build/code/indexer.js +58 -25
  6. package/build/code/indexer.js.map +1 -1
  7. package/build/git/chunker.d.ts +39 -0
  8. package/build/git/chunker.d.ts.map +1 -0
  9. package/build/git/chunker.js +210 -0
  10. package/build/git/chunker.js.map +1 -0
  11. package/build/git/chunker.test.d.ts +2 -0
  12. package/build/git/chunker.test.d.ts.map +1 -0
  13. package/build/git/chunker.test.js +230 -0
  14. package/build/git/chunker.test.js.map +1 -0
  15. package/build/git/config.d.ts +34 -0
  16. package/build/git/config.d.ts.map +1 -0
  17. package/build/git/config.js +163 -0
  18. package/build/git/config.js.map +1 -0
  19. package/build/git/extractor.d.ts +57 -0
  20. package/build/git/extractor.d.ts.map +1 -0
  21. package/build/git/extractor.integration.test.d.ts +6 -0
  22. package/build/git/extractor.integration.test.d.ts.map +1 -0
  23. package/build/git/extractor.integration.test.js +166 -0
  24. package/build/git/extractor.integration.test.js.map +1 -0
  25. package/build/git/extractor.js +231 -0
  26. package/build/git/extractor.js.map +1 -0
  27. package/build/git/extractor.test.d.ts +2 -0
  28. package/build/git/extractor.test.d.ts.map +1 -0
  29. package/build/git/extractor.test.js +267 -0
  30. package/build/git/extractor.test.js.map +1 -0
  31. package/build/git/index.d.ts +10 -0
  32. package/build/git/index.d.ts.map +1 -0
  33. package/build/git/index.js +11 -0
  34. package/build/git/index.js.map +1 -0
  35. package/build/git/indexer.d.ts +50 -0
  36. package/build/git/indexer.d.ts.map +1 -0
  37. package/build/git/indexer.js +588 -0
  38. package/build/git/indexer.js.map +1 -0
  39. package/build/git/indexer.test.d.ts +2 -0
  40. package/build/git/indexer.test.d.ts.map +1 -0
  41. package/build/git/indexer.test.js +867 -0
  42. package/build/git/indexer.test.js.map +1 -0
  43. package/build/git/sync/synchronizer.d.ts +43 -0
  44. package/build/git/sync/synchronizer.d.ts.map +1 -0
  45. package/build/git/sync/synchronizer.js +108 -0
  46. package/build/git/sync/synchronizer.js.map +1 -0
  47. package/build/git/sync/synchronizer.test.d.ts +2 -0
  48. package/build/git/sync/synchronizer.test.d.ts.map +1 -0
  49. package/build/git/sync/synchronizer.test.js +188 -0
  50. package/build/git/sync/synchronizer.test.js.map +1 -0
  51. package/build/git/types.d.ts +159 -0
  52. package/build/git/types.d.ts.map +1 -0
  53. package/build/git/types.js +5 -0
  54. package/build/git/types.js.map +1 -0
  55. package/build/index.js +18 -0
  56. package/build/index.js.map +1 -1
  57. package/build/qdrant/client.d.ts +5 -0
  58. package/build/qdrant/client.d.ts.map +1 -1
  59. package/build/qdrant/client.js +10 -0
  60. package/build/qdrant/client.js.map +1 -1
  61. package/build/qdrant/client.test.js +25 -0
  62. package/build/qdrant/client.test.js.map +1 -1
  63. package/build/tools/git-history.d.ts +10 -0
  64. package/build/tools/git-history.d.ts.map +1 -0
  65. package/build/tools/git-history.js +144 -0
  66. package/build/tools/git-history.js.map +1 -0
  67. package/build/tools/index.d.ts +2 -0
  68. package/build/tools/index.d.ts.map +1 -1
  69. package/build/tools/index.js +4 -0
  70. package/build/tools/index.js.map +1 -1
  71. package/build/tools/schemas.d.ts +24 -0
  72. package/build/tools/schemas.d.ts.map +1 -1
  73. package/build/tools/schemas.js +64 -0
  74. package/build/tools/schemas.js.map +1 -1
  75. package/package.json +1 -1
  76. package/src/code/indexer.ts +73 -24
  77. package/src/git/chunker.test.ts +284 -0
  78. package/src/git/chunker.ts +256 -0
  79. package/src/git/config.ts +173 -0
  80. package/src/git/extractor.integration.test.ts +221 -0
  81. package/src/git/extractor.test.ts +403 -0
  82. package/src/git/extractor.ts +284 -0
  83. package/src/git/index.ts +31 -0
  84. package/src/git/indexer.test.ts +1089 -0
  85. package/src/git/indexer.ts +745 -0
  86. package/src/git/sync/synchronizer.test.ts +250 -0
  87. package/src/git/sync/synchronizer.ts +122 -0
  88. package/src/git/types.ts +192 -0
  89. package/src/index.ts +42 -0
  90. package/src/qdrant/client.test.ts +29 -0
  91. package/src/qdrant/client.ts +14 -0
  92. package/src/tools/git-history.ts +208 -0
  93. package/src/tools/index.ts +7 -0
  94. package/src/tools/schemas.ts +75 -0
  95. package/tests/code/chunker/tree-sitter-chunker.test.ts +87 -5
  96. package/tests/code/indexer.test.ts +121 -0
  97. package/tests/code/integration.test.ts +14 -0
  98. package/tests/code/scanner.test.ts +81 -6
  99. package/tests/code/sync/snapshot.test.ts +55 -4
  100. package/tests/code/sync/synchronizer.test.ts +86 -10
  101. package/vitest.config.ts +2 -0
@@ -0,0 +1,256 @@
1
+ /**
2
+ * CommitChunker - Create embeddable chunks from git commits
3
+ */
4
+
5
+ import { createHash } from "node:crypto";
6
+ import { COMMIT_TYPE_PATTERNS } from "./config.js";
7
+ import type { CommitChunk, CommitType, GitConfig, RawCommit } from "./types.js";
8
+
9
+ export class CommitChunker {
10
+ constructor(private config: GitConfig) {}
11
+
12
+ /**
13
+ * Classify commit type based on commit message
14
+ */
15
+ classifyCommitType(commit: RawCommit): CommitType {
16
+ const message = `${commit.subject} ${commit.body}`.toLowerCase();
17
+
18
+ // Check each type's patterns
19
+ for (const { type, patterns } of COMMIT_TYPE_PATTERNS) {
20
+ for (const pattern of patterns) {
21
+ if (pattern.test(commit.subject) || pattern.test(message)) {
22
+ return type;
23
+ }
24
+ }
25
+ }
26
+
27
+ return "other";
28
+ }
29
+
30
+ /**
31
+ * Create embeddable chunks from a commit
32
+ * Currently produces one chunk per commit, but could be extended
33
+ * to handle very large commits differently
34
+ */
35
+ createChunks(
36
+ commit: RawCommit,
37
+ repoPath: string,
38
+ diff?: string,
39
+ ): CommitChunk[] {
40
+ const commitType = this.classifyCommitType(commit);
41
+ const content = this.formatChunkContent(commit, commitType, diff);
42
+
43
+ // Check if content exceeds max chunk size
44
+ if (content.length > this.config.maxChunkSize) {
45
+ // Truncate content but keep essential metadata visible
46
+ const truncatedContent = this.truncateContent(
47
+ content,
48
+ commit,
49
+ commitType,
50
+ );
51
+ return [
52
+ {
53
+ content: truncatedContent,
54
+ metadata: this.createMetadata(commit, commitType, repoPath),
55
+ },
56
+ ];
57
+ }
58
+
59
+ return [
60
+ {
61
+ content,
62
+ metadata: this.createMetadata(commit, commitType, repoPath),
63
+ },
64
+ ];
65
+ }
66
+
67
+ /**
68
+ * Generate deterministic chunk ID from commit content
69
+ */
70
+ generateChunkId(chunk: CommitChunk): string {
71
+ // Use commit hash + repo path for deterministic ID
72
+ const data = `${chunk.metadata.commitHash}:${chunk.metadata.repoPath}`;
73
+ const hash = createHash("sha256").update(data).digest("hex");
74
+ return `gitcommit_${hash.substring(0, 16)}`;
75
+ }
76
+
77
+ /**
78
+ * Format the chunk content for embedding
79
+ */
80
+ private formatChunkContent(
81
+ commit: RawCommit,
82
+ commitType: CommitType,
83
+ diff?: string,
84
+ ): string {
85
+ const lines: string[] = [];
86
+
87
+ // Header section
88
+ lines.push(`Commit: ${commit.shortHash}`);
89
+ lines.push(`Type: ${commitType}`);
90
+ lines.push(`Author: ${commit.author}`);
91
+ lines.push(`Date: ${commit.date.toISOString().split("T")[0]}`);
92
+ lines.push("");
93
+
94
+ // Subject line
95
+ lines.push(`Subject: ${commit.subject}`);
96
+ lines.push("");
97
+
98
+ // Body (description) if present
99
+ if (commit.body.trim()) {
100
+ lines.push("Description:");
101
+ lines.push(commit.body.trim());
102
+ lines.push("");
103
+ }
104
+
105
+ // Files changed
106
+ if (this.config.includeFileList && commit.files.length > 0) {
107
+ lines.push(`Files changed (${commit.files.length}):`);
108
+ for (const file of commit.files.slice(0, 20)) {
109
+ // Limit to 20 files
110
+ lines.push(` - ${file}`);
111
+ }
112
+ if (commit.files.length > 20) {
113
+ lines.push(` ... and ${commit.files.length - 20} more files`);
114
+ }
115
+ lines.push("");
116
+ }
117
+
118
+ // Change stats
119
+ lines.push(`Changes: +${commit.insertions} -${commit.deletions}`);
120
+
121
+ // Diff preview if included
122
+ if (this.config.includeDiff && diff) {
123
+ lines.push("");
124
+ lines.push("Diff preview:");
125
+ lines.push(this.extractDiffPreview(diff));
126
+ }
127
+
128
+ return lines.join("\n");
129
+ }
130
+
131
+ /**
132
+ * Extract a readable preview from the diff
133
+ */
134
+ private extractDiffPreview(diff: string): string {
135
+ // Remove the commit metadata from the top of git show output
136
+ const diffStart = diff.indexOf("diff --git");
137
+ if (diffStart === -1) return "";
138
+
139
+ const diffContent = diff.substring(diffStart);
140
+
141
+ // Extract just the actual changes (lines starting with + or -)
142
+ // but not the diff headers
143
+ const lines = diffContent.split("\n");
144
+ const changeLines: string[] = [];
145
+ let currentFile = "";
146
+
147
+ for (const line of lines) {
148
+ if (line.startsWith("diff --git")) {
149
+ // Extract filename from diff header
150
+ const match = line.match(/diff --git a\/.+ b\/(.+)/);
151
+ if (match) {
152
+ currentFile = match[1];
153
+ }
154
+ } else if (line.startsWith("@@")) {
155
+ // Include hunk header with file context
156
+ if (currentFile && changeLines.length > 0) {
157
+ changeLines.push(`--- ${currentFile} ---`);
158
+ }
159
+ changeLines.push(line);
160
+ } else if (
161
+ (line.startsWith("+") || line.startsWith("-")) &&
162
+ !line.startsWith("+++") &&
163
+ !line.startsWith("---")
164
+ ) {
165
+ changeLines.push(line);
166
+ }
167
+ }
168
+
169
+ // Limit the preview size
170
+ const maxPreviewLines = 50;
171
+ if (changeLines.length > maxPreviewLines) {
172
+ return (
173
+ changeLines.slice(0, maxPreviewLines).join("\n") +
174
+ `\n... (${changeLines.length - maxPreviewLines} more lines)`
175
+ );
176
+ }
177
+
178
+ return changeLines.join("\n");
179
+ }
180
+
181
+ /**
182
+ * Truncate content while preserving essential information
183
+ */
184
+ private truncateContent(
185
+ content: string,
186
+ commit: RawCommit,
187
+ commitType: CommitType,
188
+ ): string {
189
+ // Keep the header and subject, truncate the rest
190
+ const essentialLines: string[] = [
191
+ `Commit: ${commit.shortHash}`,
192
+ `Type: ${commitType}`,
193
+ `Author: ${commit.author}`,
194
+ `Date: ${commit.date.toISOString().split("T")[0]}`,
195
+ "",
196
+ `Subject: ${commit.subject}`,
197
+ "",
198
+ ];
199
+
200
+ // Add truncated body if present
201
+ if (commit.body.trim()) {
202
+ const maxBodyLength = 500;
203
+ const body = commit.body.trim();
204
+ if (body.length > maxBodyLength) {
205
+ essentialLines.push("Description:");
206
+ essentialLines.push(body.substring(0, maxBodyLength) + "...");
207
+ essentialLines.push("");
208
+ } else {
209
+ essentialLines.push("Description:");
210
+ essentialLines.push(body);
211
+ essentialLines.push("");
212
+ }
213
+ }
214
+
215
+ // Add file summary
216
+ if (commit.files.length > 0) {
217
+ essentialLines.push(`Files changed (${commit.files.length}):`);
218
+ for (const file of commit.files.slice(0, 10)) {
219
+ essentialLines.push(` - ${file}`);
220
+ }
221
+ if (commit.files.length > 10) {
222
+ essentialLines.push(` ... and ${commit.files.length - 10} more files`);
223
+ }
224
+ essentialLines.push("");
225
+ }
226
+
227
+ essentialLines.push(`Changes: +${commit.insertions} -${commit.deletions}`);
228
+ essentialLines.push("");
229
+ essentialLines.push("[content truncated due to size]");
230
+
231
+ return essentialLines.join("\n");
232
+ }
233
+
234
+ /**
235
+ * Create metadata object for a chunk
236
+ */
237
+ private createMetadata(
238
+ commit: RawCommit,
239
+ commitType: CommitType,
240
+ repoPath: string,
241
+ ): CommitChunk["metadata"] {
242
+ return {
243
+ commitHash: commit.hash,
244
+ shortHash: commit.shortHash,
245
+ author: commit.author,
246
+ authorEmail: commit.authorEmail,
247
+ date: commit.date.toISOString(),
248
+ subject: commit.subject,
249
+ commitType,
250
+ files: commit.files,
251
+ insertions: commit.insertions,
252
+ deletions: commit.deletions,
253
+ repoPath,
254
+ };
255
+ }
256
+ }
@@ -0,0 +1,173 @@
1
+ /**
2
+ * Default configuration and constants for git history indexing
3
+ */
4
+
5
+ import type { CommitType, GitConfig } from "./types.js";
6
+
7
+ /**
8
+ * Default configuration for git history indexing
9
+ */
10
+ export const DEFAULT_GIT_CONFIG: GitConfig = {
11
+ maxCommits: 5000,
12
+ includeFileList: true,
13
+ includeDiff: true,
14
+ maxDiffSize: 5000, // 5KB max diff per commit
15
+ gitTimeout: 300000, // 5 minutes timeout for git commands
16
+ maxChunkSize: 3000,
17
+ batchSize: 100,
18
+ batchRetryAttempts: 3, // Retry failed batches up to 3 times
19
+ defaultSearchLimit: 10,
20
+ enableHybridSearch: true,
21
+ };
22
+
23
+ /**
24
+ * Patterns for classifying commit types based on conventional commits
25
+ * Order matters - first match wins
26
+ */
27
+ export const COMMIT_TYPE_PATTERNS: Array<{
28
+ type: CommitType;
29
+ patterns: RegExp[];
30
+ }> = [
31
+ {
32
+ type: "feat",
33
+ patterns: [
34
+ /^feat(\(.+\))?[!:]/, // feat: or feat(scope):
35
+ /^feature(\(.+\))?[!:]/,
36
+ /\badd(ed|s|ing)?\b.*\b(feature|functionality|support)/i,
37
+ /\bimplement(ed|s|ing)?\b/i,
38
+ /\bnew\b.*\b(feature|functionality)/i,
39
+ ],
40
+ },
41
+ {
42
+ type: "fix",
43
+ patterns: [
44
+ /^fix(\(.+\))?[!:]/,
45
+ /^bugfix(\(.+\))?[!:]/,
46
+ /^hotfix(\(.+\))?[!:]/,
47
+ /\bfix(ed|es|ing)?\b.*\b(bug|issue|problem|error)/i,
48
+ /\bresolve[ds]?\b.*\b(issue|bug|problem)/i,
49
+ /\bcorrect(ed|s|ing)?\b/i,
50
+ ],
51
+ },
52
+ {
53
+ type: "refactor",
54
+ patterns: [
55
+ /^refactor(\(.+\))?[!:]/,
56
+ /\brefactor(ed|s|ing)?\b/i,
57
+ /\brestructur(ed|es|ing)?\b/i,
58
+ /\breorganiz(ed|es|ing)?\b/i,
59
+ /\bclean(ed|s|ing)?\s*up\b/i,
60
+ ],
61
+ },
62
+ {
63
+ type: "docs",
64
+ patterns: [
65
+ /^docs?(\(.+\))?[!:]/,
66
+ /\bdocument(ed|s|ing|ation)?\b/i,
67
+ /\breadme\b/i,
68
+ /\bchangelog\b/i,
69
+ /\bcomments?\b/i,
70
+ /\bjsdoc\b/i,
71
+ /\btypedoc\b/i,
72
+ ],
73
+ },
74
+ {
75
+ type: "test",
76
+ patterns: [
77
+ /^test(\(.+\))?[!:]/,
78
+ /^tests?(\(.+\))?[!:]/,
79
+ /\btest(ed|s|ing)?\b/i,
80
+ /\bspec(s)?\b/i,
81
+ /\bcoverage\b/i,
82
+ /\bunit\s*test/i,
83
+ /\bintegration\s*test/i,
84
+ /\be2e\b/i,
85
+ ],
86
+ },
87
+ {
88
+ type: "chore",
89
+ patterns: [
90
+ /^chore(\(.+\))?[!:]/,
91
+ /\bchore\b/i,
92
+ /\bmaintenance\b/i,
93
+ /\bdependenc(y|ies)\b/i,
94
+ /\bbump(ed|s|ing)?\b.*\bversion/i,
95
+ /\bupgrade[ds]?\b/i,
96
+ /\bupdate[ds]?\b.*\b(dep|package|lock)/i,
97
+ ],
98
+ },
99
+ {
100
+ type: "style",
101
+ patterns: [
102
+ /^style(\(.+\))?[!:]/,
103
+ /\bformat(ted|s|ting)?\b/i,
104
+ /\blint(ed|s|ing)?\b/i,
105
+ /\bprettier\b/i,
106
+ /\beslint\b/i,
107
+ /\bwhitespace\b/i,
108
+ /\bindentation\b/i,
109
+ ],
110
+ },
111
+ {
112
+ type: "perf",
113
+ patterns: [
114
+ /^perf(\(.+\))?[!:]/,
115
+ /^performance(\(.+\))?[!:]/,
116
+ /\bperformance\b/i,
117
+ /\boptimiz(ed|es|ing|ation)?\b/i,
118
+ /\bspeed\s*up\b/i,
119
+ /\bfaster\b/i,
120
+ /\bcach(e|ed|ing)\b/i,
121
+ ],
122
+ },
123
+ {
124
+ type: "build",
125
+ patterns: [
126
+ /^build(\(.+\))?[!:]/,
127
+ /\bbuild\b/i,
128
+ /\bwebpack\b/i,
129
+ /\brollup\b/i,
130
+ /\bvite\b/i,
131
+ /\bbundl(e|ed|er|ing)\b/i,
132
+ /\bcompil(e|ed|er|ing|ation)\b/i,
133
+ ],
134
+ },
135
+ {
136
+ type: "ci",
137
+ patterns: [
138
+ /^ci(\(.+\))?[!:]/,
139
+ /\bci\b/i,
140
+ /\bgithub\s*actions?\b/i,
141
+ /\bworkflow\b/i,
142
+ /\bpipeline\b/i,
143
+ /\btravis\b/i,
144
+ /\bcircle\s*ci\b/i,
145
+ /\bjenkins\b/i,
146
+ ],
147
+ },
148
+ {
149
+ type: "revert",
150
+ patterns: [/^revert(\(.+\))?[!:]/, /\brevert(ed|s|ing)?\b/i],
151
+ },
152
+ ];
153
+
154
+ /**
155
+ * Git log format string for extracting structured commit data
156
+ * Format: hash|shortHash|author|authorEmail|date|subject|body
157
+ */
158
+ export const GIT_LOG_FORMAT = "%H|%h|%an|%ae|%aI|%s|%b";
159
+
160
+ /**
161
+ * Delimiter used in git log output to separate commits
162
+ */
163
+ export const GIT_LOG_COMMIT_DELIMITER = "---COMMIT_DELIMITER---";
164
+
165
+ /**
166
+ * Max buffer size for git operations (50MB)
167
+ */
168
+ export const GIT_MAX_BUFFER = 50 * 1024 * 1024;
169
+
170
+ /**
171
+ * Reserved ID for storing indexing metadata in the collection
172
+ */
173
+ export const GIT_INDEXING_METADATA_ID = "__git_indexing_metadata__";
@@ -0,0 +1,221 @@
1
+ /**
2
+ * Integration tests for GitExtractor
3
+ * These tests run against real git repositories (not mocked)
4
+ */
5
+
6
+ import { describe, it, expect, beforeAll } from "vitest";
7
+ import { GitExtractor } from "./extractor.js";
8
+ import { DEFAULT_GIT_CONFIG } from "./config.js";
9
+ import type { GitConfig } from "./types.js";
10
+ import { execFile } from "node:child_process";
11
+ import { promisify } from "node:util";
12
+
13
+ const execFileAsync = promisify(execFile);
14
+
15
+ describe("GitExtractor Integration Tests", () => {
16
+ let extractor: GitExtractor;
17
+ const config: GitConfig = { ...DEFAULT_GIT_CONFIG, maxCommits: 100 };
18
+
19
+ // Use the current repository for integration tests
20
+ const repoPath = process.cwd();
21
+
22
+ beforeAll(async () => {
23
+ extractor = new GitExtractor(repoPath, config);
24
+
25
+ // Verify we're in a git repository
26
+ const isRepo = await extractor.validateRepository();
27
+ if (!isRepo) {
28
+ throw new Error("Integration tests must be run from a git repository");
29
+ }
30
+ });
31
+
32
+ describe("validateRepository", () => {
33
+ it("should detect valid git repository", async () => {
34
+ const result = await extractor.validateRepository();
35
+ expect(result).toBe(true);
36
+ });
37
+
38
+ it("should return false for non-existent path", async () => {
39
+ const badExtractor = new GitExtractor("/nonexistent/path", config);
40
+ const result = await badExtractor.validateRepository();
41
+ expect(result).toBe(false);
42
+ });
43
+ });
44
+
45
+ describe("getCommits - data integrity", () => {
46
+ it("should extract commits without data corruption", async () => {
47
+ const commits = await extractor.getCommits({ maxCommits: 50 });
48
+
49
+ expect(commits.length).toBeGreaterThan(0);
50
+ expect(commits.length).toBeLessThanOrEqual(50);
51
+
52
+ for (const commit of commits) {
53
+ // Verify hash format (40 hex characters)
54
+ expect(commit.hash).toMatch(/^[a-f0-9]{40}$/);
55
+
56
+ // Verify short hash format (7+ hex characters)
57
+ expect(commit.shortHash).toMatch(/^[a-f0-9]{7,}$/);
58
+
59
+ // Verify author is not empty
60
+ expect(commit.author.length).toBeGreaterThan(0);
61
+
62
+ // Verify author email format
63
+ expect(commit.authorEmail).toMatch(/.+@.+/);
64
+
65
+ // Verify date is valid
66
+ expect(commit.date).toBeInstanceOf(Date);
67
+ expect(commit.date.getTime()).not.toBeNaN();
68
+
69
+ // Verify subject is not empty
70
+ expect(commit.subject.length).toBeGreaterThan(0);
71
+
72
+ // CRITICAL: Verify fields don't contain numstat patterns
73
+ // This catches the parsing bug where numstat bleeds into format fields
74
+ const numstatPattern = /^\d+\s+\d+\s+\S+/;
75
+ expect(commit.hash).not.toMatch(numstatPattern);
76
+ expect(commit.author).not.toMatch(numstatPattern);
77
+ expect(commit.subject).not.toMatch(numstatPattern);
78
+
79
+ // Verify insertions/deletions are non-negative integers
80
+ expect(commit.insertions).toBeGreaterThanOrEqual(0);
81
+ expect(commit.deletions).toBeGreaterThanOrEqual(0);
82
+ expect(Number.isInteger(commit.insertions)).toBe(true);
83
+ expect(Number.isInteger(commit.deletions)).toBe(true);
84
+
85
+ // Verify files array
86
+ expect(Array.isArray(commit.files)).toBe(true);
87
+ for (const file of commit.files) {
88
+ expect(typeof file).toBe("string");
89
+ expect(file.length).toBeGreaterThan(0);
90
+ }
91
+ }
92
+ });
93
+
94
+ it("should return correct commit count matching git rev-list", async () => {
95
+ // Get expected count from git directly
96
+ const { stdout } = await execFileAsync(
97
+ "git",
98
+ ["rev-list", "--count", "-n", "50", "HEAD"],
99
+ { cwd: repoPath },
100
+ );
101
+ const expectedCount = Math.min(parseInt(stdout.trim(), 10), 50);
102
+
103
+ // Get commits via extractor
104
+ const commits = await extractor.getCommits({ maxCommits: 50 });
105
+
106
+ // Should match exactly
107
+ expect(commits.length).toBe(expectedCount);
108
+ });
109
+
110
+ it("should extract files correctly with stats", async () => {
111
+ // Find a commit with files using git log
112
+ const { stdout: logOutput } = await execFileAsync(
113
+ "git",
114
+ ["log", "--oneline", "--shortstat", "-n", "10", "HEAD"],
115
+ { cwd: repoPath },
116
+ );
117
+
118
+ // If there are commits with files changed, verify our extractor gets them
119
+ if (logOutput.includes("file")) {
120
+ const commits = await extractor.getCommits({ maxCommits: 10 });
121
+ const commitsWithFiles = commits.filter((c) => c.files.length > 0);
122
+
123
+ // At least some commits should have files
124
+ expect(commitsWithFiles.length).toBeGreaterThan(0);
125
+
126
+ // Verify files and stats are consistent
127
+ for (const commit of commitsWithFiles) {
128
+ // If there are files, there should typically be insertions or deletions
129
+ // (unless all files are renames with no changes)
130
+ expect(commit.files.length).toBeGreaterThan(0);
131
+ }
132
+ }
133
+ });
134
+ });
135
+
136
+ describe("getCommits - range filtering", () => {
137
+ it("should support sinceCommit range filtering", async () => {
138
+ // Get all commits first
139
+ const allCommits = await extractor.getCommits({ maxCommits: 20 });
140
+
141
+ if (allCommits.length >= 5) {
142
+ // Use the 5th commit as the "since" point
143
+ const sinceHash = allCommits[4].hash;
144
+
145
+ // Get commits since that point
146
+ const recentCommits = await extractor.getCommits({
147
+ sinceCommit: sinceHash,
148
+ maxCommits: 20,
149
+ });
150
+
151
+ // Should have fewer commits (the 4 before the since point)
152
+ expect(recentCommits.length).toBeLessThan(allCommits.length);
153
+ expect(recentCommits.length).toBe(4);
154
+
155
+ // Verify the commits are the expected ones
156
+ for (let i = 0; i < recentCommits.length; i++) {
157
+ expect(recentCommits[i].hash).toBe(allCommits[i].hash);
158
+ }
159
+ }
160
+ });
161
+ });
162
+
163
+ describe("getCommitDiff", () => {
164
+ it("should return diff for a valid commit", async () => {
165
+ const commits = await extractor.getCommits({ maxCommits: 1 });
166
+
167
+ if (commits.length > 0) {
168
+ const diff = await extractor.getCommitDiff(commits[0].hash);
169
+
170
+ // Diff should contain commit information
171
+ expect(diff).toContain("commit");
172
+ expect(diff).toContain(commits[0].hash);
173
+ }
174
+ });
175
+
176
+ it("should return empty string for invalid commit", async () => {
177
+ const diff = await extractor.getCommitDiff("0000000000000000000000000000000000000000");
178
+ expect(diff).toBe("");
179
+ });
180
+ });
181
+
182
+ describe("getLatestCommitHash", () => {
183
+ it("should return the HEAD commit hash", async () => {
184
+ const hash = await extractor.getLatestCommitHash();
185
+
186
+ // Verify format
187
+ expect(hash).toMatch(/^[a-f0-9]{40}$/);
188
+
189
+ // Verify it matches git rev-parse HEAD
190
+ const { stdout } = await execFileAsync("git", ["rev-parse", "HEAD"], {
191
+ cwd: repoPath,
192
+ });
193
+ expect(hash).toBe(stdout.trim());
194
+ });
195
+ });
196
+
197
+ describe("getCommitCount", () => {
198
+ it("should return total commit count", async () => {
199
+ const count = await extractor.getCommitCount();
200
+
201
+ // Verify against git rev-list
202
+ const { stdout } = await execFileAsync(
203
+ "git",
204
+ ["rev-list", "--count", "HEAD"],
205
+ { cwd: repoPath },
206
+ );
207
+ expect(count).toBe(parseInt(stdout.trim(), 10));
208
+ });
209
+
210
+ it("should return count since specific commit", async () => {
211
+ const commits = await extractor.getCommits({ maxCommits: 10 });
212
+
213
+ if (commits.length >= 5) {
214
+ const sinceHash = commits[4].hash;
215
+ const count = await extractor.getCommitCount(sinceHash);
216
+
217
+ expect(count).toBe(4); // 4 commits between sinceHash and HEAD
218
+ }
219
+ });
220
+ });
221
+ });