@mhalder/qdrant-mcp-server 1.4.0 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (92) hide show
  1. package/.codecov.yml +16 -0
  2. package/.github/workflows/claude-code-review.yml +6 -5
  3. package/.releaserc.json +8 -1
  4. package/CHANGELOG.md +34 -0
  5. package/README.md +259 -9
  6. package/build/code/chunker/base.d.ts +19 -0
  7. package/build/code/chunker/base.d.ts.map +1 -0
  8. package/build/code/chunker/base.js +5 -0
  9. package/build/code/chunker/base.js.map +1 -0
  10. package/build/code/chunker/character-chunker.d.ts +22 -0
  11. package/build/code/chunker/character-chunker.d.ts.map +1 -0
  12. package/build/code/chunker/character-chunker.js +111 -0
  13. package/build/code/chunker/character-chunker.js.map +1 -0
  14. package/build/code/chunker/tree-sitter-chunker.d.ts +29 -0
  15. package/build/code/chunker/tree-sitter-chunker.d.ts.map +1 -0
  16. package/build/code/chunker/tree-sitter-chunker.js +213 -0
  17. package/build/code/chunker/tree-sitter-chunker.js.map +1 -0
  18. package/build/code/config.d.ts +11 -0
  19. package/build/code/config.d.ts.map +1 -0
  20. package/build/code/config.js +145 -0
  21. package/build/code/config.js.map +1 -0
  22. package/build/code/indexer.d.ts +42 -0
  23. package/build/code/indexer.d.ts.map +1 -0
  24. package/build/code/indexer.js +508 -0
  25. package/build/code/indexer.js.map +1 -0
  26. package/build/code/metadata.d.ts +32 -0
  27. package/build/code/metadata.d.ts.map +1 -0
  28. package/build/code/metadata.js +128 -0
  29. package/build/code/metadata.js.map +1 -0
  30. package/build/code/scanner.d.ts +35 -0
  31. package/build/code/scanner.d.ts.map +1 -0
  32. package/build/code/scanner.js +108 -0
  33. package/build/code/scanner.js.map +1 -0
  34. package/build/code/sync/merkle.d.ts +45 -0
  35. package/build/code/sync/merkle.d.ts.map +1 -0
  36. package/build/code/sync/merkle.js +116 -0
  37. package/build/code/sync/merkle.js.map +1 -0
  38. package/build/code/sync/snapshot.d.ts +41 -0
  39. package/build/code/sync/snapshot.d.ts.map +1 -0
  40. package/build/code/sync/snapshot.js +91 -0
  41. package/build/code/sync/snapshot.js.map +1 -0
  42. package/build/code/sync/synchronizer.d.ts +53 -0
  43. package/build/code/sync/synchronizer.d.ts.map +1 -0
  44. package/build/code/sync/synchronizer.js +132 -0
  45. package/build/code/sync/synchronizer.js.map +1 -0
  46. package/build/code/types.d.ts +98 -0
  47. package/build/code/types.d.ts.map +1 -0
  48. package/build/code/types.js +5 -0
  49. package/build/code/types.js.map +1 -0
  50. package/build/index.js +252 -1
  51. package/build/index.js.map +1 -1
  52. package/build/qdrant/client.d.ts +1 -1
  53. package/build/qdrant/client.d.ts.map +1 -1
  54. package/build/qdrant/client.js +2 -2
  55. package/build/qdrant/client.js.map +1 -1
  56. package/build/qdrant/client.test.js +16 -0
  57. package/build/qdrant/client.test.js.map +1 -1
  58. package/examples/code-search/README.md +271 -0
  59. package/package.json +15 -2
  60. package/src/code/chunker/base.ts +22 -0
  61. package/src/code/chunker/character-chunker.ts +131 -0
  62. package/src/code/chunker/tree-sitter-chunker.ts +250 -0
  63. package/src/code/config.ts +156 -0
  64. package/src/code/indexer.ts +613 -0
  65. package/src/code/metadata.ts +153 -0
  66. package/src/code/scanner.ts +124 -0
  67. package/src/code/sync/merkle.ts +136 -0
  68. package/src/code/sync/snapshot.ts +110 -0
  69. package/src/code/sync/synchronizer.ts +154 -0
  70. package/src/code/types.ts +117 -0
  71. package/src/index.ts +298 -1
  72. package/src/qdrant/client.test.ts +20 -0
  73. package/src/qdrant/client.ts +2 -2
  74. package/tests/code/chunker/character-chunker.test.ts +141 -0
  75. package/tests/code/chunker/tree-sitter-chunker.test.ts +275 -0
  76. package/tests/code/fixtures/sample-py/calculator.py +32 -0
  77. package/tests/code/fixtures/sample-ts/async-operations.ts +120 -0
  78. package/tests/code/fixtures/sample-ts/auth.ts +31 -0
  79. package/tests/code/fixtures/sample-ts/config.ts +52 -0
  80. package/tests/code/fixtures/sample-ts/database.ts +50 -0
  81. package/tests/code/fixtures/sample-ts/index.ts +39 -0
  82. package/tests/code/fixtures/sample-ts/types-advanced.ts +132 -0
  83. package/tests/code/fixtures/sample-ts/utils.ts +105 -0
  84. package/tests/code/fixtures/sample-ts/validator.ts +169 -0
  85. package/tests/code/indexer.test.ts +828 -0
  86. package/tests/code/integration.test.ts +708 -0
  87. package/tests/code/metadata.test.ts +457 -0
  88. package/tests/code/scanner.test.ts +131 -0
  89. package/tests/code/sync/merkle.test.ts +406 -0
  90. package/tests/code/sync/snapshot.test.ts +360 -0
  91. package/tests/code/sync/synchronizer.test.ts +501 -0
  92. package/vitest.config.ts +1 -0
@@ -0,0 +1,117 @@
1
+ /**
2
+ * Type definitions for code vectorization module
3
+ */
4
+
5
+ export interface CodeConfig {
6
+ // Chunking
7
+ chunkSize: number;
8
+ chunkOverlap: number;
9
+ enableASTChunking: boolean;
10
+
11
+ // File discovery
12
+ supportedExtensions: string[];
13
+ ignorePatterns: string[];
14
+ customExtensions?: string[];
15
+ customIgnorePatterns?: string[];
16
+
17
+ // Indexing
18
+ batchSize: number; // Embeddings per batch
19
+ maxChunksPerFile?: number;
20
+ maxTotalChunks?: number;
21
+
22
+ // Search
23
+ defaultSearchLimit: number;
24
+ enableHybridSearch: boolean;
25
+ }
26
+
27
+ export interface ScannerConfig {
28
+ supportedExtensions: string[];
29
+ ignorePatterns: string[];
30
+ customIgnorePatterns?: string[];
31
+ }
32
+
33
+ export interface ChunkerConfig {
34
+ chunkSize: number;
35
+ chunkOverlap: number;
36
+ maxChunkSize: number;
37
+ }
38
+
39
+ export interface IndexOptions {
40
+ forceReindex?: boolean;
41
+ extensions?: string[];
42
+ ignorePatterns?: string[];
43
+ }
44
+
45
+ export interface IndexStats {
46
+ filesScanned: number;
47
+ filesIndexed: number;
48
+ chunksCreated: number;
49
+ durationMs: number;
50
+ status: "completed" | "partial" | "failed";
51
+ errors?: string[];
52
+ }
53
+
54
+ export interface ChangeStats {
55
+ filesAdded: number;
56
+ filesModified: number;
57
+ filesDeleted: number;
58
+ chunksAdded: number;
59
+ chunksDeleted: number;
60
+ durationMs: number;
61
+ }
62
+
63
+ export interface CodeSearchResult {
64
+ content: string;
65
+ filePath: string;
66
+ startLine: number;
67
+ endLine: number;
68
+ language: string;
69
+ score: number;
70
+ fileExtension: string;
71
+ }
72
+
73
+ export interface SearchOptions {
74
+ limit?: number;
75
+ useHybrid?: boolean;
76
+ fileTypes?: string[];
77
+ pathPattern?: string;
78
+ scoreThreshold?: number;
79
+ }
80
+
81
+ export interface IndexStatus {
82
+ isIndexed: boolean;
83
+ collectionName?: string;
84
+ filesCount?: number;
85
+ chunksCount?: number;
86
+ lastUpdated?: Date;
87
+ languages?: string[];
88
+ }
89
+
90
+ export type ProgressCallback = (progress: ProgressUpdate) => void;
91
+
92
+ export interface ProgressUpdate {
93
+ phase: "scanning" | "chunking" | "embedding" | "storing";
94
+ current: number;
95
+ total: number;
96
+ percentage: number;
97
+ message: string;
98
+ }
99
+
100
+ export interface CodeChunk {
101
+ content: string;
102
+ startLine: number;
103
+ endLine: number;
104
+ metadata: {
105
+ filePath: string;
106
+ language: string;
107
+ chunkIndex: number;
108
+ chunkType?: "function" | "class" | "interface" | "block";
109
+ name?: string; // Function/class name if applicable
110
+ };
111
+ }
112
+
113
+ export interface FileChanges {
114
+ added: string[];
115
+ modified: string[];
116
+ deleted: string[];
117
+ }
package/src/index.ts CHANGED
@@ -17,6 +17,16 @@ import {
17
17
  import Bottleneck from "bottleneck";
18
18
  import express from "express";
19
19
  import { z } from "zod";
20
+ import {
21
+ DEFAULT_BATCH_SIZE,
22
+ DEFAULT_CHUNK_OVERLAP,
23
+ DEFAULT_CHUNK_SIZE,
24
+ DEFAULT_CODE_EXTENSIONS,
25
+ DEFAULT_IGNORE_PATTERNS,
26
+ DEFAULT_SEARCH_LIMIT,
27
+ } from "./code/config.js";
28
+ import { CodeIndexer } from "./code/indexer.js";
29
+ import type { CodeConfig } from "./code/types.js";
20
30
  import { EmbeddingProviderFactory } from "./embeddings/factory.js";
21
31
  import { BM25SparseVectorGenerator } from "./embeddings/sparse.js";
22
32
  import { getPrompt, listPrompts, loadPromptsConfig, type PromptsConfig } from "./prompts/index.js";
@@ -29,6 +39,7 @@ const pkg = JSON.parse(readFileSync(join(__dirname, "../package.json"), "utf-8")
29
39
 
30
40
  // Validate environment variables
31
41
  const QDRANT_URL = process.env.QDRANT_URL || "http://localhost:6333";
42
+ const QDRANT_API_KEY = process.env.QDRANT_API_KEY;
32
43
  const EMBEDDING_PROVIDER = (process.env.EMBEDDING_PROVIDER || "ollama").toLowerCase();
33
44
  const TRANSPORT_MODE = (process.env.TRANSPORT_MODE || "stdio").toLowerCase();
34
45
  const HTTP_PORT = parseInt(process.env.HTTP_PORT || "3000", 10);
@@ -141,9 +152,23 @@ async function checkOllamaAvailability() {
141
152
  }
142
153
 
143
154
  // Initialize clients
144
- const qdrant = new QdrantManager(QDRANT_URL);
155
+ const qdrant = new QdrantManager(QDRANT_URL, QDRANT_API_KEY);
145
156
  const embeddings = EmbeddingProviderFactory.createFromEnv();
146
157
 
158
+ // Initialize code indexer
159
+ const codeConfig: CodeConfig = {
160
+ chunkSize: parseInt(process.env.CODE_CHUNK_SIZE || String(DEFAULT_CHUNK_SIZE), 10),
161
+ chunkOverlap: parseInt(process.env.CODE_CHUNK_OVERLAP || String(DEFAULT_CHUNK_OVERLAP), 10),
162
+ enableASTChunking: process.env.CODE_ENABLE_AST !== "false",
163
+ supportedExtensions: DEFAULT_CODE_EXTENSIONS,
164
+ ignorePatterns: DEFAULT_IGNORE_PATTERNS,
165
+ batchSize: parseInt(process.env.CODE_BATCH_SIZE || String(DEFAULT_BATCH_SIZE), 10),
166
+ defaultSearchLimit: parseInt(process.env.CODE_SEARCH_LIMIT || String(DEFAULT_SEARCH_LIMIT), 10),
167
+ enableHybridSearch: process.env.CODE_ENABLE_HYBRID === "true",
168
+ };
169
+
170
+ const codeIndexer = new CodeIndexer(qdrant, embeddings, codeConfig);
171
+
147
172
  // Load prompts configuration if file exists
148
173
  let promptsConfig: PromptsConfig | null = null;
149
174
  if (existsSync(PROMPTS_CONFIG_FILE)) {
@@ -366,6 +391,111 @@ function registerHandlers(server: Server) {
366
391
  required: ["collection", "query"],
367
392
  },
368
393
  },
394
+ {
395
+ name: "index_codebase",
396
+ description:
397
+ "Index a codebase for semantic code search. Automatically discovers files, chunks code intelligently using AST-aware parsing, and stores in vector database. Respects .gitignore and other ignore files.",
398
+ inputSchema: {
399
+ type: "object",
400
+ properties: {
401
+ path: {
402
+ type: "string",
403
+ description: "Absolute or relative path to codebase root directory",
404
+ },
405
+ forceReindex: {
406
+ type: "boolean",
407
+ description: "Force full re-index even if already indexed (default: false)",
408
+ },
409
+ extensions: {
410
+ type: "array",
411
+ items: { type: "string" },
412
+ description: "Custom file extensions to index (e.g., ['.proto', '.graphql'])",
413
+ },
414
+ ignorePatterns: {
415
+ type: "array",
416
+ items: { type: "string" },
417
+ description: "Additional patterns to ignore (e.g., ['**/test/**', '**/*.test.ts'])",
418
+ },
419
+ },
420
+ required: ["path"],
421
+ },
422
+ },
423
+ {
424
+ name: "search_code",
425
+ description:
426
+ "Search indexed codebase using natural language queries. Returns semantically relevant code chunks with file paths and line numbers.",
427
+ inputSchema: {
428
+ type: "object",
429
+ properties: {
430
+ path: {
431
+ type: "string",
432
+ description: "Path to codebase (must be indexed first)",
433
+ },
434
+ query: {
435
+ type: "string",
436
+ description: "Natural language search query (e.g., 'authentication logic')",
437
+ },
438
+ limit: {
439
+ type: "number",
440
+ description: "Maximum number of results (default: 5, max: 100)",
441
+ },
442
+ fileTypes: {
443
+ type: "array",
444
+ items: { type: "string" },
445
+ description: "Filter by file extensions (e.g., ['.ts', '.py'])",
446
+ },
447
+ pathPattern: {
448
+ type: "string",
449
+ description: "Filter by path glob pattern (e.g., 'src/services/**')",
450
+ },
451
+ },
452
+ required: ["path", "query"],
453
+ },
454
+ },
455
+ {
456
+ name: "reindex_changes",
457
+ description:
458
+ "Incrementally re-index only changed files. Detects added, modified, and deleted files since last index. Requires previous indexing with index_codebase.",
459
+ inputSchema: {
460
+ type: "object",
461
+ properties: {
462
+ path: {
463
+ type: "string",
464
+ description: "Path to codebase",
465
+ },
466
+ },
467
+ required: ["path"],
468
+ },
469
+ },
470
+ {
471
+ name: "get_index_status",
472
+ description: "Get indexing status and statistics for a codebase.",
473
+ inputSchema: {
474
+ type: "object",
475
+ properties: {
476
+ path: {
477
+ type: "string",
478
+ description: "Path to codebase",
479
+ },
480
+ },
481
+ required: ["path"],
482
+ },
483
+ },
484
+ {
485
+ name: "clear_index",
486
+ description:
487
+ "Delete all indexed data for a codebase. This is irreversible and will remove the entire collection.",
488
+ inputSchema: {
489
+ type: "object",
490
+ properties: {
491
+ path: {
492
+ type: "string",
493
+ description: "Path to codebase",
494
+ },
495
+ },
496
+ required: ["path"],
497
+ },
498
+ },
369
499
  ],
370
500
  };
371
501
  });
@@ -600,6 +730,173 @@ function registerHandlers(server: Server) {
600
730
  };
601
731
  }
602
732
 
733
+ case "index_codebase": {
734
+ const IndexCodebaseSchema = z.object({
735
+ path: z.string(),
736
+ forceReindex: z.boolean().optional(),
737
+ extensions: z.array(z.string()).optional(),
738
+ ignorePatterns: z.array(z.string()).optional(),
739
+ });
740
+
741
+ const { path, forceReindex, extensions, ignorePatterns } =
742
+ IndexCodebaseSchema.parse(args);
743
+
744
+ const stats = await codeIndexer.indexCodebase(
745
+ path,
746
+ { forceReindex, extensions, ignorePatterns },
747
+ (progress) => {
748
+ // Progress callback - could send progress updates via SSE in future
749
+ console.error(`[${progress.phase}] ${progress.percentage}% - ${progress.message}`);
750
+ }
751
+ );
752
+
753
+ let statusMessage = `Indexed ${stats.filesIndexed}/${stats.filesScanned} files (${stats.chunksCreated} chunks) in ${(stats.durationMs / 1000).toFixed(1)}s`;
754
+
755
+ if (stats.status === "partial") {
756
+ statusMessage += `\n\nWarnings:\n${stats.errors?.join("\n")}`;
757
+ } else if (stats.status === "failed") {
758
+ statusMessage = `Indexing failed:\n${stats.errors?.join("\n")}`;
759
+ }
760
+
761
+ return {
762
+ content: [
763
+ {
764
+ type: "text",
765
+ text: statusMessage,
766
+ },
767
+ ],
768
+ isError: stats.status === "failed",
769
+ };
770
+ }
771
+
772
+ case "search_code": {
773
+ const SearchCodeSchema = z.object({
774
+ path: z.string(),
775
+ query: z.string(),
776
+ limit: z.number().optional(),
777
+ fileTypes: z.array(z.string()).optional(),
778
+ pathPattern: z.string().optional(),
779
+ });
780
+
781
+ const { path, query, limit, fileTypes, pathPattern } = SearchCodeSchema.parse(args);
782
+
783
+ const results = await codeIndexer.searchCode(path, query, {
784
+ limit,
785
+ fileTypes,
786
+ pathPattern,
787
+ });
788
+
789
+ if (results.length === 0) {
790
+ return {
791
+ content: [
792
+ {
793
+ type: "text",
794
+ text: `No results found for query: "${query}"`,
795
+ },
796
+ ],
797
+ };
798
+ }
799
+
800
+ // Format results with file references
801
+ const formattedResults = results
802
+ .map(
803
+ (r, idx) =>
804
+ `\n--- Result ${idx + 1} (score: ${r.score.toFixed(3)}) ---\n` +
805
+ `File: ${r.filePath}:${r.startLine}-${r.endLine}\n` +
806
+ `Language: ${r.language}\n\n` +
807
+ `${r.content}\n`
808
+ )
809
+ .join("\n");
810
+
811
+ return {
812
+ content: [
813
+ {
814
+ type: "text",
815
+ text: `Found ${results.length} result(s):\n${formattedResults}`,
816
+ },
817
+ ],
818
+ };
819
+ }
820
+
821
+ case "get_index_status": {
822
+ const GetIndexStatusSchema = z.object({
823
+ path: z.string(),
824
+ });
825
+
826
+ const { path } = GetIndexStatusSchema.parse(args);
827
+ const status = await codeIndexer.getIndexStatus(path);
828
+
829
+ if (!status.isIndexed) {
830
+ return {
831
+ content: [
832
+ {
833
+ type: "text",
834
+ text: `Codebase at "${path}" is not indexed. Use index_codebase to index it first.`,
835
+ },
836
+ ],
837
+ };
838
+ }
839
+
840
+ return {
841
+ content: [
842
+ {
843
+ type: "text",
844
+ text: JSON.stringify(status, null, 2),
845
+ },
846
+ ],
847
+ };
848
+ }
849
+
850
+ case "reindex_changes": {
851
+ const ReindexChangesSchema = z.object({
852
+ path: z.string(),
853
+ });
854
+
855
+ const { path } = ReindexChangesSchema.parse(args);
856
+
857
+ const stats = await codeIndexer.reindexChanges(path, (progress) => {
858
+ console.error(`[${progress.phase}] ${progress.percentage}% - ${progress.message}`);
859
+ });
860
+
861
+ let message = `Incremental re-index complete:\n`;
862
+ message += `- Files added: ${stats.filesAdded}\n`;
863
+ message += `- Files modified: ${stats.filesModified}\n`;
864
+ message += `- Files deleted: ${stats.filesDeleted}\n`;
865
+ message += `- Chunks added: ${stats.chunksAdded}\n`;
866
+ message += `- Duration: ${(stats.durationMs / 1000).toFixed(1)}s`;
867
+
868
+ if (stats.filesAdded === 0 && stats.filesModified === 0 && stats.filesDeleted === 0) {
869
+ message = `No changes detected. Codebase is up to date.`;
870
+ }
871
+
872
+ return {
873
+ content: [
874
+ {
875
+ type: "text",
876
+ text: message,
877
+ },
878
+ ],
879
+ };
880
+ }
881
+
882
+ case "clear_index": {
883
+ const ClearIndexSchema = z.object({
884
+ path: z.string(),
885
+ });
886
+
887
+ const { path } = ClearIndexSchema.parse(args);
888
+ await codeIndexer.clearIndex(path);
889
+
890
+ return {
891
+ content: [
892
+ {
893
+ type: "text",
894
+ text: `Index cleared for codebase at "${path}".`,
895
+ },
896
+ ],
897
+ };
898
+ }
899
+
603
900
  default:
604
901
  return {
605
902
  content: [
@@ -27,6 +27,26 @@ describe("QdrantManager", () => {
27
27
  manager = new QdrantManager("http://localhost:6333");
28
28
  });
29
29
 
30
+ describe("constructor", () => {
31
+ it("should pass apiKey to QdrantClient when provided", () => {
32
+ new QdrantManager("http://localhost:6333", "test-api-key");
33
+
34
+ expect(QdrantClient).toHaveBeenCalledWith({
35
+ url: "http://localhost:6333",
36
+ apiKey: "test-api-key",
37
+ });
38
+ });
39
+
40
+ it("should work without apiKey for unauthenticated instances", () => {
41
+ new QdrantManager("http://localhost:6333");
42
+
43
+ expect(QdrantClient).toHaveBeenCalledWith({
44
+ url: "http://localhost:6333",
45
+ apiKey: undefined,
46
+ });
47
+ });
48
+ });
49
+
30
50
  describe("createCollection", () => {
31
51
  it("should create a collection with default distance metric", async () => {
32
52
  await manager.createCollection("test-collection", 1536);
@@ -23,8 +23,8 @@ export interface SparseVector {
23
23
  export class QdrantManager {
24
24
  private client: QdrantClient;
25
25
 
26
- constructor(url: string = "http://localhost:6333") {
27
- this.client = new QdrantClient({ url });
26
+ constructor(url: string = "http://localhost:6333", apiKey?: string) {
27
+ this.client = new QdrantClient({ url, apiKey });
28
28
  }
29
29
 
30
30
  /**
@@ -0,0 +1,141 @@
1
+ import { beforeEach, describe, expect, it } from "vitest";
2
+ import { CharacterChunker } from "../../../src/code/chunker/character-chunker.js";
3
+ import type { ChunkerConfig } from "../../../src/code/types.js";
4
+
5
+ describe("CharacterChunker", () => {
6
+ let chunker: CharacterChunker;
7
+ let config: ChunkerConfig;
8
+
9
+ beforeEach(() => {
10
+ config = {
11
+ chunkSize: 100,
12
+ chunkOverlap: 20,
13
+ maxChunkSize: 200,
14
+ };
15
+ chunker = new CharacterChunker(config);
16
+ });
17
+
18
+ describe("chunk", () => {
19
+ it("should chunk small code into single chunk", async () => {
20
+ const code =
21
+ "function hello() {\n console.log('Starting hello function');\n return 'world';\n}";
22
+ const chunks = await chunker.chunk(code, "test.ts", "typescript");
23
+
24
+ expect(chunks).toHaveLength(1);
25
+ expect(chunks[0].content).toContain("hello");
26
+ expect(chunks[0].startLine).toBe(1);
27
+ expect(chunks[0].metadata.language).toBe("typescript");
28
+ });
29
+
30
+ it("should chunk large code into multiple chunks", async () => {
31
+ const code = Array(20)
32
+ .fill("function testFunction() { console.log('This is a test function'); return true; }\n")
33
+ .join("");
34
+ const chunks = await chunker.chunk(code, "test.js", "javascript");
35
+
36
+ expect(chunks.length).toBeGreaterThan(1);
37
+ chunks.forEach((chunk) => {
38
+ expect(chunk.content.length).toBeLessThanOrEqual(config.maxChunkSize);
39
+ });
40
+ });
41
+
42
+ it("should preserve line numbers", async () => {
43
+ const code =
44
+ "This is line 1 with enough content to not be filtered\n" +
45
+ "This is line 2 with enough content to not be filtered\n" +
46
+ "This is line 3 with enough content to not be filtered";
47
+ const chunks = await chunker.chunk(code, "test.txt", "text");
48
+
49
+ expect(chunks.length).toBeGreaterThan(0);
50
+ expect(chunks[0].startLine).toBe(1);
51
+ expect(chunks[0].endLine).toBeGreaterThan(chunks[0].startLine);
52
+ });
53
+
54
+ it("should apply overlap between chunks", async () => {
55
+ const code = Array(20).fill("const x = 1;\n").join("");
56
+ const chunks = await chunker.chunk(code, "test.js", "javascript");
57
+
58
+ if (chunks.length > 1) {
59
+ // Check that there's overlap in content
60
+ expect(chunks.length).toBeGreaterThan(1);
61
+ }
62
+ });
63
+
64
+ it("should find good break points", async () => {
65
+ const code = `function foo() {
66
+ return 1;
67
+ }
68
+
69
+ function bar() {
70
+ return 2;
71
+ }
72
+
73
+ function baz() {
74
+ return 3;
75
+ }`;
76
+
77
+ const chunks = await chunker.chunk(code, "test.js", "javascript");
78
+ // Should try to break at function boundaries
79
+ chunks.forEach((chunk) => {
80
+ expect(chunk.content.length).toBeGreaterThan(0);
81
+ });
82
+ });
83
+
84
+ it("should handle empty code", async () => {
85
+ const code = "";
86
+ const chunks = await chunker.chunk(code, "test.ts", "typescript");
87
+ expect(chunks).toHaveLength(0);
88
+ });
89
+
90
+ it("should handle code with only whitespace", async () => {
91
+ const code = " \n\n\n ";
92
+ const chunks = await chunker.chunk(code, "test.ts", "typescript");
93
+ expect(chunks).toHaveLength(0);
94
+ });
95
+
96
+ it("should skip very small chunks", async () => {
97
+ const code = "x";
98
+ const chunks = await chunker.chunk(code, "test.ts", "typescript");
99
+ expect(chunks).toHaveLength(0);
100
+ });
101
+ });
102
+
103
+ describe("supportsLanguage", () => {
104
+ it("should support all languages", () => {
105
+ expect(chunker.supportsLanguage("typescript")).toBe(true);
106
+ expect(chunker.supportsLanguage("python")).toBe(true);
107
+ expect(chunker.supportsLanguage("unknown")).toBe(true);
108
+ });
109
+ });
110
+
111
+ describe("getStrategyName", () => {
112
+ it("should return correct strategy name", () => {
113
+ expect(chunker.getStrategyName()).toBe("character-based");
114
+ });
115
+ });
116
+
117
+ describe("metadata", () => {
118
+ it("should include correct chunk metadata", async () => {
119
+ const code = "function test() {\n console.log('test function');\n return 1;\n}";
120
+ const chunks = await chunker.chunk(code, "/path/to/file.ts", "typescript");
121
+
122
+ expect(chunks.length).toBeGreaterThan(0);
123
+ expect(chunks[0].metadata).toEqual({
124
+ filePath: "/path/to/file.ts",
125
+ language: "typescript",
126
+ chunkIndex: 0,
127
+ chunkType: "block",
128
+ });
129
+ });
130
+
131
+ it("should increment chunk index", async () => {
132
+ const code = Array(20).fill("function test() {}\n").join("");
133
+ const chunks = await chunker.chunk(code, "test.ts", "typescript");
134
+
135
+ if (chunks.length > 1) {
136
+ expect(chunks[0].metadata.chunkIndex).toBe(0);
137
+ expect(chunks[1].metadata.chunkIndex).toBe(1);
138
+ }
139
+ });
140
+ });
141
+ });