codevault 1.7.3 → 1.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (215) hide show
  1. package/README.md +18 -5
  2. package/dist/chunking/semantic-chunker.d.ts.map +1 -1
  3. package/dist/chunking/semantic-chunker.js +20 -7
  4. package/dist/chunking/semantic-chunker.js.map +1 -1
  5. package/dist/cli/commands/index-cmd.d.ts +3 -0
  6. package/dist/cli/commands/index-cmd.d.ts.map +1 -0
  7. package/dist/cli/commands/index-cmd.js +118 -0
  8. package/dist/cli/commands/index-cmd.js.map +1 -0
  9. package/dist/cli/commands/info-cmd.d.ts +3 -0
  10. package/dist/cli/commands/info-cmd.d.ts.map +1 -0
  11. package/dist/cli/commands/info-cmd.js +47 -0
  12. package/dist/cli/commands/info-cmd.js.map +1 -0
  13. package/dist/cli/commands/mcp-cmd.d.ts +3 -0
  14. package/dist/cli/commands/mcp-cmd.d.ts.map +1 -0
  15. package/dist/cli/commands/mcp-cmd.js +33 -0
  16. package/dist/cli/commands/mcp-cmd.js.map +1 -0
  17. package/dist/cli/commands/search-cmd.d.ts +3 -0
  18. package/dist/cli/commands/search-cmd.d.ts.map +1 -0
  19. package/dist/cli/commands/search-cmd.js +52 -0
  20. package/dist/cli/commands/search-cmd.js.map +1 -0
  21. package/dist/cli/commands/search-with-code-cmd.d.ts +3 -0
  22. package/dist/cli/commands/search-with-code-cmd.d.ts.map +1 -0
  23. package/dist/cli/commands/search-with-code-cmd.js +74 -0
  24. package/dist/cli/commands/search-with-code-cmd.js.map +1 -0
  25. package/dist/cli/commands/update-cmd.d.ts +3 -0
  26. package/dist/cli/commands/update-cmd.d.ts.map +1 -0
  27. package/dist/cli/commands/update-cmd.js +24 -0
  28. package/dist/cli/commands/update-cmd.js.map +1 -0
  29. package/dist/cli/commands/watch-cmd.d.ts +3 -0
  30. package/dist/cli/commands/watch-cmd.d.ts.map +1 -0
  31. package/dist/cli/commands/watch-cmd.js +47 -0
  32. package/dist/cli/commands/watch-cmd.js.map +1 -0
  33. package/dist/cli/index.d.ts +3 -0
  34. package/dist/cli/index.d.ts.map +1 -0
  35. package/dist/cli/index.js +47 -0
  36. package/dist/cli/index.js.map +1 -0
  37. package/dist/cli.d.ts +1 -1
  38. package/dist/cli.d.ts.map +1 -1
  39. package/dist/cli.js +4 -398
  40. package/dist/cli.js.map +1 -1
  41. package/dist/codemap/io.d.ts +1 -0
  42. package/dist/codemap/io.d.ts.map +1 -1
  43. package/dist/codemap/io.js.map +1 -1
  44. package/dist/config/constants.d.ts +11 -0
  45. package/dist/config/constants.d.ts.map +1 -1
  46. package/dist/config/constants.js +9 -0
  47. package/dist/config/constants.js.map +1 -1
  48. package/dist/config/loader.d.ts.map +1 -1
  49. package/dist/config/loader.js +9 -2
  50. package/dist/config/loader.js.map +1 -1
  51. package/dist/core/IndexerEngine.d.ts +9 -17
  52. package/dist/core/IndexerEngine.d.ts.map +1 -1
  53. package/dist/core/IndexerEngine.js +72 -347
  54. package/dist/core/IndexerEngine.js.map +1 -1
  55. package/dist/core/SearchService.d.ts +13 -13
  56. package/dist/core/SearchService.d.ts.map +1 -1
  57. package/dist/core/SearchService.js +133 -325
  58. package/dist/core/SearchService.js.map +1 -1
  59. package/dist/core/batch-indexer.d.ts +5 -0
  60. package/dist/core/batch-indexer.d.ts.map +1 -1
  61. package/dist/core/batch-indexer.js +22 -23
  62. package/dist/core/batch-indexer.js.map +1 -1
  63. package/dist/core/indexer.d.ts +13 -0
  64. package/dist/core/indexer.d.ts.map +1 -1
  65. package/dist/core/indexer.js +13 -0
  66. package/dist/core/indexer.js.map +1 -1
  67. package/dist/core/indexing/FileProcessor.d.ts +42 -0
  68. package/dist/core/indexing/FileProcessor.d.ts.map +1 -0
  69. package/dist/core/indexing/FileProcessor.js +245 -0
  70. package/dist/core/indexing/FileProcessor.js.map +1 -0
  71. package/dist/core/indexing/IndexContext.d.ts +42 -0
  72. package/dist/core/indexing/IndexContext.d.ts.map +1 -0
  73. package/dist/core/indexing/IndexContext.js +133 -0
  74. package/dist/core/indexing/IndexContext.js.map +1 -0
  75. package/dist/core/indexing/IndexFinalizationStage.d.ts +39 -0
  76. package/dist/core/indexing/IndexFinalizationStage.d.ts.map +1 -0
  77. package/dist/core/indexing/IndexFinalizationStage.js +114 -0
  78. package/dist/core/indexing/IndexFinalizationStage.js.map +1 -0
  79. package/dist/core/indexing/IndexState.d.ts +50 -0
  80. package/dist/core/indexing/IndexState.d.ts.map +1 -0
  81. package/dist/core/indexing/IndexState.js +66 -0
  82. package/dist/core/indexing/IndexState.js.map +1 -0
  83. package/dist/core/indexing/chunk-pipeline.d.ts +39 -3
  84. package/dist/core/indexing/chunk-pipeline.d.ts.map +1 -1
  85. package/dist/core/indexing/chunk-pipeline.js +59 -24
  86. package/dist/core/indexing/chunk-pipeline.js.map +1 -1
  87. package/dist/core/search/CandidateRetriever.d.ts +51 -0
  88. package/dist/core/search/CandidateRetriever.d.ts.map +1 -0
  89. package/dist/core/search/CandidateRetriever.js +119 -0
  90. package/dist/core/search/CandidateRetriever.js.map +1 -0
  91. package/dist/core/search/HybridFusion.d.ts +89 -0
  92. package/dist/core/search/HybridFusion.d.ts.map +1 -0
  93. package/dist/core/search/HybridFusion.js +263 -0
  94. package/dist/core/search/HybridFusion.js.map +1 -0
  95. package/dist/core/search/ResultMapper.d.ts +31 -0
  96. package/dist/core/search/ResultMapper.d.ts.map +1 -0
  97. package/dist/core/search/ResultMapper.js +131 -0
  98. package/dist/core/search/ResultMapper.js.map +1 -0
  99. package/dist/core/search/SearchContextManager.d.ts +69 -0
  100. package/dist/core/search/SearchContextManager.d.ts.map +1 -0
  101. package/dist/core/search/SearchContextManager.js +139 -0
  102. package/dist/core/search/SearchContextManager.js.map +1 -0
  103. package/dist/core/search.d.ts +31 -0
  104. package/dist/core/search.d.ts.map +1 -1
  105. package/dist/core/search.js +33 -0
  106. package/dist/core/search.js.map +1 -1
  107. package/dist/core/types.d.ts +13 -0
  108. package/dist/core/types.d.ts.map +1 -1
  109. package/dist/database/db.d.ts +36 -18
  110. package/dist/database/db.d.ts.map +1 -1
  111. package/dist/database/db.js +151 -24
  112. package/dist/database/db.js.map +1 -1
  113. package/dist/indexer/ChangeQueue.d.ts +67 -0
  114. package/dist/indexer/ChangeQueue.d.ts.map +1 -0
  115. package/dist/indexer/ChangeQueue.js +182 -0
  116. package/dist/indexer/ChangeQueue.js.map +1 -0
  117. package/dist/indexer/ProviderManager.d.ts +33 -0
  118. package/dist/indexer/ProviderManager.d.ts.map +1 -0
  119. package/dist/indexer/ProviderManager.js +75 -0
  120. package/dist/indexer/ProviderManager.js.map +1 -0
  121. package/dist/indexer/WatchService.d.ts +68 -0
  122. package/dist/indexer/WatchService.d.ts.map +1 -0
  123. package/dist/indexer/WatchService.js +155 -0
  124. package/dist/indexer/WatchService.js.map +1 -0
  125. package/dist/indexer/merkle.d.ts +5 -0
  126. package/dist/indexer/merkle.d.ts.map +1 -1
  127. package/dist/indexer/merkle.js +36 -12
  128. package/dist/indexer/merkle.js.map +1 -1
  129. package/dist/indexer/update.d.ts +14 -2
  130. package/dist/indexer/update.d.ts.map +1 -1
  131. package/dist/indexer/update.js +11 -0
  132. package/dist/indexer/update.js.map +1 -1
  133. package/dist/indexer/watch.d.ts +15 -20
  134. package/dist/indexer/watch.d.ts.map +1 -1
  135. package/dist/indexer/watch.js +14 -223
  136. package/dist/indexer/watch.js.map +1 -1
  137. package/dist/mcp/handlers/project.d.ts.map +1 -1
  138. package/dist/mcp/handlers/project.js +17 -1
  139. package/dist/mcp/handlers/project.js.map +1 -1
  140. package/dist/mcp-server.d.ts +6 -0
  141. package/dist/mcp-server.d.ts.map +1 -1
  142. package/dist/mcp-server.js +63 -2
  143. package/dist/mcp-server.js.map +1 -1
  144. package/dist/providers/base.d.ts +3 -0
  145. package/dist/providers/base.d.ts.map +1 -1
  146. package/dist/providers/base.js.map +1 -1
  147. package/dist/providers/index.d.ts +7 -0
  148. package/dist/providers/index.d.ts.map +1 -1
  149. package/dist/providers/index.js +15 -0
  150. package/dist/providers/index.js.map +1 -1
  151. package/dist/providers/mock.d.ts +16 -0
  152. package/dist/providers/mock.d.ts.map +1 -0
  153. package/dist/providers/mock.js +46 -0
  154. package/dist/providers/mock.js.map +1 -0
  155. package/dist/providers/openai.d.ts.map +1 -1
  156. package/dist/providers/openai.js +4 -8
  157. package/dist/providers/openai.js.map +1 -1
  158. package/dist/ranking/api-reranker.d.ts.map +1 -1
  159. package/dist/ranking/api-reranker.js +9 -5
  160. package/dist/ranking/api-reranker.js.map +1 -1
  161. package/dist/search/bm25.d.ts.map +1 -1
  162. package/dist/search/bm25.js +21 -1
  163. package/dist/search/bm25.js.map +1 -1
  164. package/dist/search/scope.d.ts +1 -1
  165. package/dist/search/scope.d.ts.map +1 -1
  166. package/dist/search/scope.js +3 -3
  167. package/dist/search/scope.js.map +1 -1
  168. package/dist/storage/encrypted-chunks.d.ts +7 -1
  169. package/dist/storage/encrypted-chunks.d.ts.map +1 -1
  170. package/dist/storage/encrypted-chunks.js +187 -75
  171. package/dist/storage/encrypted-chunks.js.map +1 -1
  172. package/dist/symbols/extract.d.ts +2 -1
  173. package/dist/symbols/extract.d.ts.map +1 -1
  174. package/dist/symbols/extract.js +1 -1
  175. package/dist/symbols/extract.js.map +1 -1
  176. package/dist/symbols/graph.d.ts.map +1 -1
  177. package/dist/symbols/graph.js.map +1 -1
  178. package/dist/synthesis/conversational-synthesizer.d.ts.map +1 -1
  179. package/dist/synthesis/conversational-synthesizer.js +68 -76
  180. package/dist/synthesis/conversational-synthesizer.js.map +1 -1
  181. package/dist/synthesis/prompt-builder.d.ts +2 -0
  182. package/dist/synthesis/prompt-builder.d.ts.map +1 -1
  183. package/dist/synthesis/prompt-builder.js +80 -81
  184. package/dist/synthesis/prompt-builder.js.map +1 -1
  185. package/dist/synthesis/synthesizer.d.ts +11 -0
  186. package/dist/synthesis/synthesizer.d.ts.map +1 -1
  187. package/dist/synthesis/synthesizer.js +29 -1
  188. package/dist/synthesis/synthesizer.js.map +1 -1
  189. package/dist/tests/helpers/test-repo.d.ts +7 -0
  190. package/dist/tests/helpers/test-repo.d.ts.map +1 -0
  191. package/dist/tests/helpers/test-repo.js +21 -0
  192. package/dist/tests/helpers/test-repo.js.map +1 -0
  193. package/dist/tests/integration/index-search.integration.test.d.ts +2 -0
  194. package/dist/tests/integration/index-search.integration.test.d.ts.map +1 -0
  195. package/dist/tests/integration/index-search.integration.test.js +163 -0
  196. package/dist/tests/integration/index-search.integration.test.js.map +1 -0
  197. package/dist/tests/simple-lru.test.js +22 -10
  198. package/dist/tests/simple-lru.test.js.map +1 -1
  199. package/dist/utils/cli-ui.d.ts.map +1 -1
  200. package/dist/utils/cli-ui.js +9 -3
  201. package/dist/utils/cli-ui.js.map +1 -1
  202. package/dist/utils/logger.d.ts +4 -1
  203. package/dist/utils/logger.d.ts.map +1 -1
  204. package/dist/utils/logger.js.map +1 -1
  205. package/dist/utils/path-helpers.d.ts.map +1 -1
  206. package/dist/utils/path-helpers.js +17 -1
  207. package/dist/utils/path-helpers.js.map +1 -1
  208. package/dist/utils/scan-patterns.d.ts.map +1 -1
  209. package/dist/utils/scan-patterns.js +1 -2
  210. package/dist/utils/scan-patterns.js.map +1 -1
  211. package/dist/utils/simple-lru.d.ts +21 -2
  212. package/dist/utils/simple-lru.d.ts.map +1 -1
  213. package/dist/utils/simple-lru.js +126 -19
  214. package/dist/utils/simple-lru.js.map +1 -1
  215. package/package.json +13 -4
@@ -0,0 +1,133 @@
1
+ import path from 'path';
2
+ import fs from 'fs';
3
+ import { createEmbeddingProvider, getModelProfile, getSizeLimits } from '../../providers/index.js';
4
+ import { BATCH_SIZE } from '../../providers/base.js';
5
+ import { readCodemap } from '../../codemap/io.js';
6
+ import { loadMerkle, cloneMerkle } from '../../indexer/merkle.js';
7
+ import { resolveEncryptionPreference } from '../../storage/encrypted-chunks.js';
8
+ import { Database, initDatabase } from '../../database/db.js';
9
+ import { BatchEmbeddingProcessor } from '../batch-indexer.js';
10
+ import { logger } from '../../utils/logger.js';
11
+ import { resolveProviderContext } from '../../config/resolver.js';
12
+ /**
13
+ * IndexContext prepares the indexing environment by:
14
+ * - Validating repository
15
+ * - Initializing embedding provider
16
+ * - Setting up database
17
+ * - Loading codemap and merkle tree
18
+ * - Creating batch processor
19
+ */
20
+ export class IndexContext {
21
+ static async prepare(options) {
22
+ const { repoPath = '.', provider = 'auto', changedFiles = null, embeddingProviderOverride = null, encryptMode = undefined } = options;
23
+ const repo = path.resolve(repoPath);
24
+ // Validate repository exists
25
+ try {
26
+ await fs.promises.access(repo);
27
+ }
28
+ catch {
29
+ throw new Error(`Directory ${repo} does not exist`);
30
+ }
31
+ // Setup provider context and embedding provider
32
+ const providerContext = resolveProviderContext(repo);
33
+ const providerInstance = embeddingProviderOverride ||
34
+ createEmbeddingProvider(provider, providerContext.embedding);
35
+ if (!embeddingProviderOverride && providerInstance.init) {
36
+ await providerInstance.init();
37
+ }
38
+ // Get provider and model information
39
+ const providerName = providerInstance.getName();
40
+ const modelName = providerInstance.getModelName ? providerInstance.getModelName() : null;
41
+ const modelProfile = await getModelProfile(providerName, modelName || providerName);
42
+ const limits = getSizeLimits(modelProfile);
43
+ // Log configuration
44
+ if (!process.env.CODEVAULT_QUIET) {
45
+ logger.info(`Chunking Configuration`, {
46
+ provider: providerName,
47
+ model: modelName,
48
+ dimensions: providerInstance.getDimensions(),
49
+ mode: limits.unit
50
+ });
51
+ }
52
+ // Initialize database
53
+ await initDatabase(providerInstance.getDimensions(), repo);
54
+ // Setup paths
55
+ const codemapPath = path.join(repo, 'codevault.codemap.json');
56
+ const chunkDir = path.join(repo, '.codevault/chunks');
57
+ const dbPath = path.join(repo, '.codevault/codevault.db');
58
+ // Check for dimension mismatches
59
+ await IndexContext.checkDimensionMismatch(dbPath, providerInstance);
60
+ // Setup encryption
61
+ const encryptionPreference = resolveEncryptionPreference({
62
+ mode: encryptMode,
63
+ logger: console
64
+ });
65
+ // Load existing state
66
+ const codemap = readCodemap(codemapPath);
67
+ const merkle = loadMerkle(repo);
68
+ const updatedMerkle = cloneMerkle(merkle);
69
+ // Create database connection
70
+ const db = new Database(dbPath);
71
+ // Create batch processor
72
+ const batchProcessor = new BatchEmbeddingProcessor(providerInstance, db, BATCH_SIZE);
73
+ const isPartialUpdate = changedFiles !== null;
74
+ return {
75
+ repo,
76
+ repoPath,
77
+ provider,
78
+ providerInstance,
79
+ providerName,
80
+ modelName,
81
+ modelProfile,
82
+ limits,
83
+ codemapPath,
84
+ chunkDir,
85
+ dbPath,
86
+ encryptionPreference,
87
+ codemap,
88
+ merkle,
89
+ updatedMerkle,
90
+ db,
91
+ batchProcessor,
92
+ isPartialUpdate
93
+ };
94
+ }
95
+ /**
96
+ * Check if there's a dimension or provider mismatch and warn user
97
+ */
98
+ static async checkDimensionMismatch(dbPath, embeddingProvider) {
99
+ try {
100
+ await fs.promises.access(dbPath);
101
+ }
102
+ catch {
103
+ return; // DB doesn't exist yet
104
+ }
105
+ const db = new Database(dbPath);
106
+ try {
107
+ const existingDimensions = await db.getExistingDimensions();
108
+ if (existingDimensions.length > 0) {
109
+ const currentProvider = embeddingProvider.getName();
110
+ const currentDimensions = embeddingProvider.getDimensions();
111
+ const hasMismatch = existingDimensions.some(row => row.embedding_provider !== currentProvider ||
112
+ row.embedding_dimensions !== currentDimensions);
113
+ if (hasMismatch) {
114
+ logger.warn('Dimension/Provider Mismatch Detected!', {
115
+ existing: existingDimensions,
116
+ current: { provider: currentProvider, dimensions: currentDimensions },
117
+ recommendation: 'Full re-index recommended'
118
+ });
119
+ await new Promise(resolve => setTimeout(resolve, 2000));
120
+ }
121
+ }
122
+ }
123
+ catch (error) {
124
+ logger.debug('Migration check encountered an error (continuing)', {
125
+ error: error instanceof Error ? error.message : String(error)
126
+ });
127
+ }
128
+ finally {
129
+ db.close();
130
+ }
131
+ }
132
+ }
133
+ //# sourceMappingURL=IndexContext.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"IndexContext.js","sourceRoot":"","sources":["../../../src/core/indexing/IndexContext.ts"],"names":[],"mappings":"AAAA,OAAO,IAAI,MAAM,MAAM,CAAC;AACxB,OAAO,EAAE,MAAM,IAAI,CAAC;AACpB,OAAO,EAAE,uBAAuB,EAAE,eAAe,EAAE,aAAa,EAA0B,MAAM,0BAA0B,CAAC;AAC3H,OAAO,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AACrD,OAAO,EAAE,WAAW,EAAgB,MAAM,qBAAqB,CAAC;AAChE,OAAO,EAAE,UAAU,EAAE,WAAW,EAAmB,MAAM,yBAAyB,CAAC;AACnF,OAAO,EAAE,2BAA2B,EAAE,MAAM,mCAAmC,CAAC;AAChF,OAAO,EAAE,QAAQ,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAC9D,OAAO,EAAE,uBAAuB,EAAE,MAAM,qBAAqB,CAAC;AAC9D,OAAO,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAC/C,OAAO,EAAE,sBAAsB,EAAE,MAAM,0BAA0B,CAAC;AAwBlE;;;;;;;GAOG;AACH,MAAM,OAAO,YAAY;IACvB,MAAM,CAAC,KAAK,CAAC,OAAO,CAAC,OAA4B;QAC/C,MAAM,EACJ,QAAQ,GAAG,GAAG,EACd,QAAQ,GAAG,MAAM,EACjB,YAAY,GAAG,IAAI,EACnB,yBAAyB,GAAG,IAAI,EAChC,WAAW,GAAG,SAAS,EACxB,GAAG,OAAO,CAAC;QAEZ,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,QAAQ,CAAC,CAAC;QAEpC,6BAA6B;QAC7B,IAAI,CAAC;YACH,MAAM,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,IAAI,CAAC,CAAC;QACjC,CAAC;QAAC,MAAM,CAAC;YACP,MAAM,IAAI,KAAK,CAAC,aAAa,IAAI,iBAAiB,CAAC,CAAC;QACtD,CAAC;QAED,gDAAgD;QAChD,MAAM,eAAe,GAAG,sBAAsB,CAAC,IAAI,CAAC,CAAC;QACrD,MAAM,gBAAgB,GAAG,yBAAyB;YAChD,uBAAuB,CAAC,QAAQ,EAAE,eAAe,CAAC,SAAS,CAAC,CAAC;QAE/D,IAAI,CAAC,yBAAyB,IAAI,gBAAgB,CAAC,IAAI,EAAE,CAAC;YACxD,MAAM,gBAAgB,CAAC,IAAI,EAAE,CAAC;QAChC,CAAC;QAED,qCAAqC;QACrC,MAAM,YAAY,GAAG,gBAAgB,CAAC,OAAO,EAAE,CAAC;QAChD,MAAM,SAAS,GAAG,gBAAgB,CAAC,YAAY,CAAC,CAAC,CAAC,gBAAgB,CAAC,YAAY,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC;QACzF,MAAM,YAAY,GAAG,MAAM,eAAe,CAAC,YAAY,EAAE,SAAS,IAAI,YAAY,CAAC,CAAC;QACpF,MAAM,MAAM,GAAG,aAAa,CAAC,YAAY,CAAC,CAAC;QAE3C,oBAAoB;QACpB,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,eAAe,EAAE,CAAC;YACjC,MAAM,CAAC,IAAI,CAAC,wBAAwB,EAAE;gBACpC,QAAQ,EAAE,YAAY;gBACtB,KAAK,EAAE,SAAS;gBAChB,UAAU,EAAE,gBAAgB,CAAC,aAAa,EAAE;gBAC5C,IAAI,EAAE,MAAM,CAAC,IAAI;aAClB,CAAC,CAAC;QACL,CAAC;QAED,sBAAsB;QACtB,MAAM,YAAY,CAAC,gBAAgB,CAAC,aAAa,EAAE,EAAE,IAAI,CAAC,CAAC;QAE3D,cAAc;QACd,MAAM,WAAW,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,wBAAwB,CAAC,CAAC;QAC9D,MAAM,QAAQ,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,mBAAmB,CAAC,CAAC;QACtD,MAAM,MAAM,GAAG,IAAI,CAAC,IAAI,CAAC,IAAI,EAAE,yBAAyB,CAAC,CAAC;QAE1D,iCAAiC;QACjC,MAAM,YAAY,CAAC,sBAAsB,CAAC,MAAM,EAAE,gBAAgB,CAAC,CAAC;QAEpE,mBAAmB;QACnB,MAAM,oBAAoB,GAAG,2BAA2B,CAAC;YACvD,IAAI,EAAE,WAAW;YACjB,MAAM,EAAE,OAAO;SAChB,CAAC,CAAC;QAEH,sBAAsB;QACtB,MAAM,OAAO,GAAG,WAAW,CAAC,WAAW,CAAC,CAAC;QACzC,MAAM,MAAM,GAAG,UAAU,CAAC,IAAI,CAAC,CAAC;QAChC,MAAM,aAAa,GAAG,WAAW,CAAC,MAAM,CAAC,CAAC;QAE1C,6BAA6B;QAC7B,MAAM,EAAE,GAAG,IAAI,QAAQ,CAAC,MAAM,CAAC,CAAC;QAEhC,yBAAyB;QACzB,MAAM,cAAc,GAAG,IAAI,uBAAuB,CAAC,gBAAgB,EAAE,EAAE,EAAE,UAAU,CAAC,CAAC;QAErF,MAAM,eAAe,GAAG,YAAY,KAAK,IAAI,CAAC;QAE9C,OAAO;YACL,IAAI;YACJ,QAAQ;YACR,QAAQ;YACR,gBAAgB;YAChB,YAAY;YACZ,SAAS;YACT,YAAY;YACZ,MAAM;YACN,WAAW;YACX,QAAQ;YACR,MAAM;YACN,oBAAoB;YACpB,OAAO;YACP,MAAM;YACN,aAAa;YACb,EAAE;YACF,cAAc;YACd,eAAe;SAChB,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,MAAM,CAAC,KAAK,CAAC,sBAAsB,CACzC,MAAc,EACd,iBAAoC;QAEpC,IAAI,CAAC;YACH,MAAM,EAAE,CAAC,QAAQ,CAAC,MAAM,CAAC,MAAM,CAAC,CAAC;QACnC,CAAC;QAAC,MAAM,CAAC;YACP,OAAO,CAAC,uBAAuB;QACjC,CAAC;QAED,MAAM,EAAE,GAAG,IAAI,QAAQ,CAAC,MAAM,CAAC,CAAC;QAChC,IAAI,CAAC;YACH,MAAM,kBAAkB,GAAG,MAAM,EAAE,CAAC,qBAAqB,EAAE,CAAC;YAE5D,IAAI,kBAAkB,CAAC,MAAM,GAAG,CAAC,EAAE,CAAC;gBAClC,MAAM,eAAe,GAAG,iBAAiB,CAAC,OAAO,EAAE,CAAC;gBACpD,MAAM,iBAAiB,GAAG,iBAAiB,CAAC,aAAa,EAAE,CAAC;gBAE5D,MAAM,WAAW,GAAG,kBAAkB,CAAC,IAAI,CACzC,GAAG,CAAC,EAAE,CAAC,GAAG,CAAC,kBAAkB,KAAK,eAAe;oBAC1C,GAAG,CAAC,oBAAoB,KAAK,iBAAiB,CACtD,CAAC;gBAEF,IAAI,WAAW,EAAE,CAAC;oBAChB,MAAM,CAAC,IAAI,CAAC,uCAAuC,EAAE;wBACnD,QAAQ,EAAE,kBAAkB;wBAC5B,OAAO,EAAE,EAAE,QAAQ,EAAE,eAAe,EAAE,UAAU,EAAE,iBAAiB,EAAE;wBACrE,cAAc,EAAE,2BAA2B;qBAC5C,CAAC,CAAC;oBAEH,MAAM,IAAI,OAAO,CAAC,OAAO,CAAC,EAAE,CAAC,UAAU,CAAC,OAAO,EAAE,IAAI,CAAC,CAAC,CAAC;gBAC1D,CAAC;YACH,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,MAAM,CAAC,KAAK,CAAC,mDAAmD,EAAE;gBAChE,KAAK,EAAE,KAAK,YAAY,KAAK,CAAC,CAAC,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,CAAC,MAAM,CAAC,KAAK,CAAC;aAC9D,CAAC,CAAC;QACL,CAAC;gBAAS,CAAC;YACT,EAAE,CAAC,KAAK,EAAE,CAAC;QACb,CAAC;IACH,CAAC;CACF"}
@@ -0,0 +1,39 @@
1
+ import type { IndexContextData } from './IndexContext.js';
2
+ import type { IndexState } from './IndexState.js';
3
+ import type { IndexProjectResult } from '../types.js';
4
+ /**
5
+ * IndexFinalizationStage handles the finalization of the indexing process:
6
+ * - Flushing batch processor
7
+ * - Saving merkle tree
8
+ * - Building symbol graph
9
+ * - Writing codemap
10
+ * - Building result object
11
+ * - Cleaning up resources
12
+ */
13
+ export declare class IndexFinalizationStage {
14
+ private context;
15
+ private state;
16
+ private onProgress;
17
+ constructor(context: IndexContextData, state: IndexState, onProgress: ((event: any) => void) | null);
18
+ /**
19
+ * Finalize the indexing process
20
+ */
21
+ finalize(): Promise<IndexProjectResult>;
22
+ /**
23
+ * Flush the batch processor
24
+ */
25
+ private flushBatchProcessor;
26
+ /**
27
+ * Log chunking and processing statistics
28
+ */
29
+ private logStatistics;
30
+ /**
31
+ * Build the final result object
32
+ */
33
+ private buildResult;
34
+ /**
35
+ * Clean up resources
36
+ */
37
+ private cleanup;
38
+ }
39
+ //# sourceMappingURL=IndexFinalizationStage.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"IndexFinalizationStage.d.ts","sourceRoot":"","sources":["../../../src/core/indexing/IndexFinalizationStage.ts"],"names":[],"mappings":"AAKA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,mBAAmB,CAAC;AAC1D,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,iBAAiB,CAAC;AAClD,OAAO,KAAK,EAAE,kBAAkB,EAAE,MAAM,aAAa,CAAC;AAEtD;;;;;;;;GAQG;AACH,qBAAa,sBAAsB;IAE/B,OAAO,CAAC,OAAO;IACf,OAAO,CAAC,KAAK;IACb,OAAO,CAAC,UAAU;gBAFV,OAAO,EAAE,gBAAgB,EACzB,KAAK,EAAE,UAAU,EACjB,UAAU,EAAE,CAAC,CAAC,KAAK,EAAE,GAAG,KAAK,IAAI,CAAC,GAAG,IAAI;IAGnD;;OAEG;IACG,QAAQ,IAAI,OAAO,CAAC,kBAAkB,CAAC;IAiC7C;;OAEG;YACW,mBAAmB;IAajC;;OAEG;IACH,OAAO,CAAC,aAAa;IAUrB;;OAEG;IACH,OAAO,CAAC,WAAW;IAYnB;;OAEG;YACW,OAAO;CAatB"}
@@ -0,0 +1,114 @@
1
+ import { saveMerkle } from '../../indexer/merkle.js';
2
+ import { writeCodemap } from '../../codemap/io.js';
3
+ import { attachSymbolGraphToCodemap } from '../../symbols/graph.js';
4
+ import { getTokenCountStats } from '../../chunking/token-counter.js';
5
+ import { logger } from '../../utils/logger.js';
6
+ /**
7
+ * IndexFinalizationStage handles the finalization of the indexing process:
8
+ * - Flushing batch processor
9
+ * - Saving merkle tree
10
+ * - Building symbol graph
11
+ * - Writing codemap
12
+ * - Building result object
13
+ * - Cleaning up resources
14
+ */
15
+ export class IndexFinalizationStage {
16
+ context;
17
+ state;
18
+ onProgress;
19
+ constructor(context, state, onProgress) {
20
+ this.context = context;
21
+ this.state = state;
22
+ this.onProgress = onProgress;
23
+ }
24
+ /**
25
+ * Finalize the indexing process
26
+ */
27
+ async finalize() {
28
+ try {
29
+ // Notify progress
30
+ if (this.onProgress) {
31
+ this.onProgress({ type: 'finalizing' });
32
+ }
33
+ // Flush any remaining embeddings
34
+ await this.flushBatchProcessor();
35
+ // Save merkle tree if modified
36
+ if (this.state.merkleDirty) {
37
+ saveMerkle(this.context.repo, this.state.updatedMerkle);
38
+ }
39
+ // Build symbol graph and write codemap
40
+ attachSymbolGraphToCodemap(this.state.codemap);
41
+ this.state.codemap = writeCodemap(this.context.codemapPath, this.state.codemap);
42
+ // Get token statistics
43
+ const tokenStats = getTokenCountStats();
44
+ // Log statistics
45
+ this.logStatistics();
46
+ // Build result
47
+ return this.buildResult(tokenStats);
48
+ }
49
+ finally {
50
+ // Clean up resources
51
+ await this.cleanup();
52
+ }
53
+ }
54
+ /**
55
+ * Flush the batch processor
56
+ */
57
+ async flushBatchProcessor() {
58
+ try {
59
+ if (this.context.batchProcessor) {
60
+ await this.context.batchProcessor.flush();
61
+ }
62
+ }
63
+ catch (error) {
64
+ this.state.addError({
65
+ type: 'finalize_error',
66
+ error: error.message
67
+ });
68
+ }
69
+ }
70
+ /**
71
+ * Log chunking and processing statistics
72
+ */
73
+ logStatistics() {
74
+ if (!process.env.CODEVAULT_QUIET) {
75
+ logger.info('Chunking Statistics', {
76
+ stats: { ...this.state.chunkingStats },
77
+ processedChunks: this.state.processedChunks,
78
+ totalChunks: Object.keys(this.state.codemap).length
79
+ });
80
+ }
81
+ }
82
+ /**
83
+ * Build the final result object
84
+ */
85
+ buildResult(tokenStats) {
86
+ return {
87
+ success: true,
88
+ processedChunks: this.state.processedChunks,
89
+ totalChunks: Object.keys(this.state.codemap).length,
90
+ provider: this.context.providerInstance.getName(),
91
+ errors: this.state.errors,
92
+ chunkingStats: this.state.chunkingStats,
93
+ tokenStats: this.context.modelProfile.useTokens ? tokenStats : undefined
94
+ };
95
+ }
96
+ /**
97
+ * Clean up resources
98
+ */
99
+ async cleanup() {
100
+ // Close database connection
101
+ try {
102
+ if (this.context.db) {
103
+ this.context.db.close();
104
+ }
105
+ }
106
+ catch (error) {
107
+ this.state.addError({
108
+ type: 'db_close_error',
109
+ error: error.message
110
+ });
111
+ }
112
+ }
113
+ }
114
+ //# sourceMappingURL=IndexFinalizationStage.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"IndexFinalizationStage.js","sourceRoot":"","sources":["../../../src/core/indexing/IndexFinalizationStage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AACrD,OAAO,EAAE,YAAY,EAAE,MAAM,qBAAqB,CAAC;AACnD,OAAO,EAAE,0BAA0B,EAAE,MAAM,wBAAwB,CAAC;AACpE,OAAO,EAAE,kBAAkB,EAAE,MAAM,iCAAiC,CAAC;AACrE,OAAO,EAAE,MAAM,EAAE,MAAM,uBAAuB,CAAC;AAK/C;;;;;;;;GAQG;AACH,MAAM,OAAO,sBAAsB;IAEvB;IACA;IACA;IAHV,YACU,OAAyB,EACzB,KAAiB,EACjB,UAAyC;QAFzC,YAAO,GAAP,OAAO,CAAkB;QACzB,UAAK,GAAL,KAAK,CAAY;QACjB,eAAU,GAAV,UAAU,CAA+B;IAChD,CAAC;IAEJ;;OAEG;IACH,KAAK,CAAC,QAAQ;QACZ,IAAI,CAAC;YACH,kBAAkB;YAClB,IAAI,IAAI,CAAC,UAAU,EAAE,CAAC;gBACpB,IAAI,CAAC,UAAU,CAAC,EAAE,IAAI,EAAE,YAAY,EAAE,CAAC,CAAC;YAC1C,CAAC;YAED,iCAAiC;YACjC,MAAM,IAAI,CAAC,mBAAmB,EAAE,CAAC;YAEjC,+BAA+B;YAC/B,IAAI,IAAI,CAAC,KAAK,CAAC,WAAW,EAAE,CAAC;gBAC3B,UAAU,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,IAAI,CAAC,KAAK,CAAC,aAAa,CAAC,CAAC;YAC1D,CAAC;YAED,uCAAuC;YACvC,0BAA0B,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YAC/C,IAAI,CAAC,KAAK,CAAC,OAAO,GAAG,YAAY,CAAC,IAAI,CAAC,OAAO,CAAC,WAAW,EAAE,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC;YAEhF,uBAAuB;YACvB,MAAM,UAAU,GAAG,kBAAkB,EAAE,CAAC;YAExC,iBAAiB;YACjB,IAAI,CAAC,aAAa,EAAE,CAAC;YAErB,eAAe;YACf,OAAO,IAAI,CAAC,WAAW,CAAC,UAAU,CAAC,CAAC;QACtC,CAAC;gBAAS,CAAC;YACT,qBAAqB;YACrB,MAAM,IAAI,CAAC,OAAO,EAAE,CAAC;QACvB,CAAC;IACH,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,mBAAmB;QAC/B,IAAI,CAAC;YACH,IAAI,IAAI,CAAC,OAAO,CAAC,cAAc,EAAE,CAAC;gBAChC,MAAM,IAAI,CAAC,OAAO,CAAC,cAAc,CAAC,KAAK,EAAE,CAAC;YAC5C,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC;gBAClB,IAAI,EAAE,gBAAgB;gBACtB,KAAK,EAAG,KAAe,CAAC,OAAO;aAChC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED;;OAEG;IACK,aAAa;QACnB,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,eAAe,EAAE,CAAC;YACjC,MAAM,CAAC,IAAI,CAAC,qBAAqB,EAAE;gBACjC,KAAK,EAAE,EAAE,GAAG,IAAI,CAAC,KAAK,CAAC,aAAa,EAAE;gBACtC,eAAe,EAAE,IAAI,CAAC,KAAK,CAAC,eAAe;gBAC3C,WAAW,EAAE,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,MAAM;aACpD,CAAC,CAAC;QACL,CAAC;IACH,CAAC;IAED;;OAEG;IACK,WAAW,CAAC,UAAe;QACjC,OAAO;YACL,OAAO,EAAE,IAAI;YACb,eAAe,EAAE,IAAI,CAAC,KAAK,CAAC,eAAe;YAC3C,WAAW,EAAE,MAAM,CAAC,IAAI,CAAC,IAAI,CAAC,KAAK,CAAC,OAAO,CAAC,CAAC,MAAM;YACnD,QAAQ,EAAE,IAAI,CAAC,OAAO,CAAC,gBAAgB,CAAC,OAAO,EAAE;YACjD,MAAM,EAAE,IAAI,CAAC,KAAK,CAAC,MAAM;YACzB,aAAa,EAAE,IAAI,CAAC,KAAK,CAAC,aAAa;YACvC,UAAU,EAAE,IAAI,CAAC,OAAO,CAAC,YAAY,CAAC,SAAS,CAAC,CAAC,CAAC,UAAU,CAAC,CAAC,CAAC,SAAS;SACzE,CAAC;IACJ,CAAC;IAED;;OAEG;IACK,KAAK,CAAC,OAAO;QACnB,4BAA4B;QAC5B,IAAI,CAAC;YACH,IAAI,IAAI,CAAC,OAAO,CAAC,EAAE,EAAE,CAAC;gBACpB,IAAI,CAAC,OAAO,CAAC,EAAE,CAAC,KAAK,EAAE,CAAC;YAC1B,CAAC;QACH,CAAC;QAAC,OAAO,KAAK,EAAE,CAAC;YACf,IAAI,CAAC,KAAK,CAAC,QAAQ,CAAC;gBAClB,IAAI,EAAE,gBAAgB;gBACtB,KAAK,EAAG,KAAe,CAAC,OAAO;aAChC,CAAC,CAAC;QACL,CAAC;IACH,CAAC;CACF"}
@@ -0,0 +1,50 @@
1
+ import type { Codemap } from '../../codemap/io.js';
2
+ import type { MerkleTree } from '../../indexer/merkle.js';
3
+ import type { ChunkingStats } from '../types.js';
4
+ /**
5
+ * IndexState tracks mutable state during the indexing process
6
+ */
7
+ export declare class IndexState {
8
+ codemap: Codemap;
9
+ updatedMerkle: MerkleTree;
10
+ merkleDirty: boolean;
11
+ indexMutated: boolean;
12
+ processedChunks: number;
13
+ errors: any[];
14
+ chunkingStats: ChunkingStats;
15
+ constructor(codemap: Codemap, updatedMerkle: MerkleTree);
16
+ /**
17
+ * Add an error to the error list
18
+ */
19
+ addError(error: {
20
+ type: string;
21
+ file?: string;
22
+ chunkId?: string;
23
+ error: string;
24
+ }): void;
25
+ /**
26
+ * Mark merkle tree as modified
27
+ */
28
+ markMerkleDirty(): void;
29
+ /**
30
+ * Mark index as modified
31
+ */
32
+ markIndexMutated(): void;
33
+ /**
34
+ * Increment processed chunk counter
35
+ */
36
+ incrementProcessedChunks(): void;
37
+ /**
38
+ * Update chunking stats
39
+ */
40
+ updateChunkingStats(stats: Partial<ChunkingStats>): void;
41
+ /**
42
+ * Get current error count
43
+ */
44
+ getErrorCount(): number;
45
+ /**
46
+ * Check if index was modified
47
+ */
48
+ wasModified(): boolean;
49
+ }
50
+ //# sourceMappingURL=IndexState.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"IndexState.d.ts","sourceRoot":"","sources":["../../../src/core/indexing/IndexState.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,qBAAqB,CAAC;AACnD,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,yBAAyB,CAAC;AAC1D,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAEjD;;GAEG;AACH,qBAAa,UAAU;IACrB,OAAO,EAAE,OAAO,CAAC;IACjB,aAAa,EAAE,UAAU,CAAC;IAC1B,WAAW,UAAS;IACpB,YAAY,UAAS;IACrB,eAAe,SAAK;IACpB,MAAM,EAAE,GAAG,EAAE,CAAM;IACnB,aAAa,EAAE,aAAa,CAO1B;gBAEU,OAAO,EAAE,OAAO,EAAE,aAAa,EAAE,UAAU;IAKvD;;OAEG;IACH,QAAQ,CAAC,KAAK,EAAE;QAAE,IAAI,EAAE,MAAM,CAAC;QAAC,IAAI,CAAC,EAAE,MAAM,CAAC;QAAC,OAAO,CAAC,EAAE,MAAM,CAAC;QAAC,KAAK,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI;IAIvF;;OAEG;IACH,eAAe,IAAI,IAAI;IAIvB;;OAEG;IACH,gBAAgB,IAAI,IAAI;IAIxB;;OAEG;IACH,wBAAwB,IAAI,IAAI;IAIhC;;OAEG;IACH,mBAAmB,CAAC,KAAK,EAAE,OAAO,CAAC,aAAa,CAAC,GAAG,IAAI;IAIxD;;OAEG;IACH,aAAa,IAAI,MAAM;IAIvB;;OAEG;IACH,WAAW,IAAI,OAAO;CAGvB"}
@@ -0,0 +1,66 @@
1
+ /**
2
+ * IndexState tracks mutable state during the indexing process
3
+ */
4
+ export class IndexState {
5
+ codemap;
6
+ updatedMerkle;
7
+ merkleDirty = false;
8
+ indexMutated = false;
9
+ processedChunks = 0;
10
+ errors = [];
11
+ chunkingStats = {
12
+ totalNodes: 0,
13
+ skippedSmall: 0,
14
+ subdivided: 0,
15
+ statementFallback: 0,
16
+ normalChunks: 0,
17
+ mergedSmall: 0
18
+ };
19
+ constructor(codemap, updatedMerkle) {
20
+ this.codemap = codemap;
21
+ this.updatedMerkle = updatedMerkle;
22
+ }
23
+ /**
24
+ * Add an error to the error list
25
+ */
26
+ addError(error) {
27
+ this.errors.push(error);
28
+ }
29
+ /**
30
+ * Mark merkle tree as modified
31
+ */
32
+ markMerkleDirty() {
33
+ this.merkleDirty = true;
34
+ }
35
+ /**
36
+ * Mark index as modified
37
+ */
38
+ markIndexMutated() {
39
+ this.indexMutated = true;
40
+ }
41
+ /**
42
+ * Increment processed chunk counter
43
+ */
44
+ incrementProcessedChunks() {
45
+ this.processedChunks++;
46
+ }
47
+ /**
48
+ * Update chunking stats
49
+ */
50
+ updateChunkingStats(stats) {
51
+ this.chunkingStats = { ...this.chunkingStats, ...stats };
52
+ }
53
+ /**
54
+ * Get current error count
55
+ */
56
+ getErrorCount() {
57
+ return this.errors.length;
58
+ }
59
+ /**
60
+ * Check if index was modified
61
+ */
62
+ wasModified() {
63
+ return this.indexMutated || this.merkleDirty;
64
+ }
65
+ }
66
+ //# sourceMappingURL=IndexState.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"IndexState.js","sourceRoot":"","sources":["../../../src/core/indexing/IndexState.ts"],"names":[],"mappings":"AAIA;;GAEG;AACH,MAAM,OAAO,UAAU;IACrB,OAAO,CAAU;IACjB,aAAa,CAAa;IAC1B,WAAW,GAAG,KAAK,CAAC;IACpB,YAAY,GAAG,KAAK,CAAC;IACrB,eAAe,GAAG,CAAC,CAAC;IACpB,MAAM,GAAU,EAAE,CAAC;IACnB,aAAa,GAAkB;QAC7B,UAAU,EAAE,CAAC;QACb,YAAY,EAAE,CAAC;QACf,UAAU,EAAE,CAAC;QACb,iBAAiB,EAAE,CAAC;QACpB,YAAY,EAAE,CAAC;QACf,WAAW,EAAE,CAAC;KACf,CAAC;IAEF,YAAY,OAAgB,EAAE,aAAyB;QACrD,IAAI,CAAC,OAAO,GAAG,OAAO,CAAC;QACvB,IAAI,CAAC,aAAa,GAAG,aAAa,CAAC;IACrC,CAAC;IAED;;OAEG;IACH,QAAQ,CAAC,KAAuE;QAC9E,IAAI,CAAC,MAAM,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;IAC1B,CAAC;IAED;;OAEG;IACH,eAAe;QACb,IAAI,CAAC,WAAW,GAAG,IAAI,CAAC;IAC1B,CAAC;IAED;;OAEG;IACH,gBAAgB;QACd,IAAI,CAAC,YAAY,GAAG,IAAI,CAAC;IAC3B,CAAC;IAED;;OAEG;IACH,wBAAwB;QACtB,IAAI,CAAC,eAAe,EAAE,CAAC;IACzB,CAAC;IAED;;OAEG;IACH,mBAAmB,CAAC,KAA6B;QAC/C,IAAI,CAAC,aAAa,GAAG,EAAE,GAAG,IAAI,CAAC,aAAa,EAAE,GAAG,KAAK,EAAE,CAAC;IAC3D,CAAC;IAED;;OAEG;IACH,aAAa;QACX,OAAO,IAAI,CAAC,MAAM,CAAC,MAAM,CAAC;IAC5B,CAAC;IAED;;OAEG;IACH,WAAW;QACT,OAAO,IAAI,CAAC,YAAY,IAAI,IAAI,CAAC,WAAW,CAAC;IAC/C,CAAC;CACF"}
@@ -1,3 +1,6 @@
1
+ import Parser from 'tree-sitter';
2
+ import { type NodeGroup } from '../../chunking/file-grouper.js';
3
+ import type { TreeSitterNode } from '../../types/ast.js';
1
4
  import type { LanguageRule } from '../../languages/rules.js';
2
5
  import type { ModelProfile } from '../../providers/base.js';
3
6
  type SizeLimits = {
@@ -7,6 +10,10 @@ type SizeLimits = {
7
10
  overlap: number;
8
11
  unit: string;
9
12
  };
13
+ export interface OversizedChunk {
14
+ code: string;
15
+ part: number;
16
+ }
10
17
  interface ExistingChunks {
11
18
  staleChunkIds: Set<string>;
12
19
  existingChunks: Map<string, any>;
@@ -26,12 +33,41 @@ interface EmbedStoreParams {
26
33
  contextInfo: any;
27
34
  symbolData: any;
28
35
  }
29
- export declare class ChunkPipeline {
36
+ /**
37
+ * Collects candidate AST nodes for chunking using a reusable parser instance.
38
+ */
39
+ export declare class ASTTraverser {
30
40
  private parser;
41
+ constructor(parser?: Parser);
42
+ collectNodesForFile(source: string, rule: LanguageRule): TreeSitterNode[];
43
+ private buildTree;
44
+ }
45
+ export interface OverlapStrategy {
46
+ split(node: TreeSitterNode, source: string, limits: SizeLimits, profile: ModelProfile): Promise<OversizedChunk[]>;
47
+ }
48
+ /**
49
+ * Default overlap strategy that falls back to statement-level chunking with 20% overlap.
50
+ */
51
+ export declare class StatementOverlapStrategy implements OverlapStrategy {
52
+ split(node: TreeSitterNode, source: string, limits: SizeLimits, profile: ModelProfile): Promise<OversizedChunk[]>;
53
+ }
54
+ export declare class ChunkGrouper {
55
+ groupNodes(nodes: TreeSitterNode[], source: string, profile: ModelProfile, rule: LanguageRule): Promise<NodeGroup[]>;
56
+ }
57
+ export interface ChunkPipelineDependencies {
58
+ traverser?: ASTTraverser;
59
+ chunkGrouper?: ChunkGrouper;
60
+ overlapStrategy?: OverlapStrategy;
61
+ }
62
+ export declare class ChunkPipeline {
31
63
  private processedNodes;
32
- constructor();
64
+ private traverser;
65
+ private chunkGrouper;
66
+ private overlapStrategy;
67
+ constructor(deps?: ChunkPipelineDependencies);
33
68
  collectNodesForFile(source: string, rule: LanguageRule): Promise<import("tree-sitter").SyntaxNode[]>;
34
- processGroups(nodeGroups: any[], source: string, rule: LanguageRule, limits: SizeLimits, modelProfile: ModelProfile, rel: string, existing: ExistingChunks, chunkMerkleHashes: string[], onProgress: any, embedAndStore: (params: EmbedStoreParams) => Promise<void>, chunkingStats: any): Promise<void>;
69
+ groupNodes(nodes: TreeSitterNode[], source: string, profile: ModelProfile, rule: LanguageRule): Promise<NodeGroup[]>;
70
+ processGroups(nodeGroups: NodeGroup[], source: string, rule: LanguageRule, limits: SizeLimits, modelProfile: ModelProfile, rel: string, existing: ExistingChunks, chunkMerkleHashes: string[], onProgress: any, embedAndStore: (params: EmbedStoreParams) => Promise<void>, chunkingStats: any): Promise<void>;
35
71
  private yieldChunk;
36
72
  private processChunk;
37
73
  }
@@ -1 +1 @@
1
- {"version":3,"file":"chunk-pipeline.d.ts","sourceRoot":"","sources":["../../../src/core/indexing/chunk-pipeline.ts"],"names":[],"mappings":"AAkBA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AAC7D,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AAE5D,KAAK,UAAU,GAAG;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,CAAC;CACd,CAAC;AAEF,UAAU,cAAc;IACtB,aAAa,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;IAC3B,cAAc,EAAE,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAClC;AAED,UAAU,gBAAgB;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,qBAAqB,EAAE,MAAM,CAAC;IAC9B,OAAO,EAAE,MAAM,CAAC;IAChB,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,MAAM,CAAC;IACb,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,iBAAiB,EAAE,GAAG,CAAC;IACvB,kBAAkB,EAAE,GAAG,EAAE,CAAC;IAC1B,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,WAAW,EAAE,GAAG,CAAC;IACjB,UAAU,EAAE,GAAG,CAAC;CACjB;AAED,qBAAa,aAAa;IACxB,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,cAAc,CAAqB;;IAMrC,mBAAmB,CAAC,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,YAAY;IA6DtD,aAAa,CACjB,UAAU,EAAE,GAAG,EAAE,EACjB,MAAM,EAAE,MAAM,EACd,IAAI,EAAE,YAAY,EAClB,MAAM,EAAE,UAAU,EAClB,YAAY,EAAE,YAAY,EAC1B,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,cAAc,EACxB,iBAAiB,EAAE,MAAM,EAAE,EAC3B,UAAU,EAAE,GAAG,EACf,aAAa,EAAE,CAAC,MAAM,EAAE,gBAAgB,KAAK,OAAO,CAAC,IAAI,CAAC,EAC1D,aAAa,EAAE,GAAG,GACjB,OAAO,CAAC,IAAI,CAAC;YAgCF,UAAU;YAqGV,YAAY;CAmF3B"}
1
+ {"version":3,"file":"chunk-pipeline.d.ts","sourceRoot":"","sources":["../../../src/core/indexing/chunk-pipeline.ts"],"names":[],"mappings":"AAGA,OAAO,MAAM,MAAM,aAAa,CAAC;AAEjC,OAAO,EAA8C,KAAK,SAAS,EAAE,MAAM,gCAAgC,CAAC;AAY5G,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,oBAAoB,CAAC;AACzD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,0BAA0B,CAAC;AAC7D,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,yBAAyB,CAAC;AAE5D,KAAK,UAAU,GAAG;IAChB,OAAO,EAAE,MAAM,CAAC;IAChB,GAAG,EAAE,MAAM,CAAC;IACZ,GAAG,EAAE,MAAM,CAAC;IACZ,OAAO,EAAE,MAAM,CAAC;IAChB,IAAI,EAAE,MAAM,CAAC;CACd,CAAC;AAEF,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,IAAI,EAAE,MAAM,CAAC;CACd;AAED,UAAU,cAAc;IACtB,aAAa,EAAE,GAAG,CAAC,MAAM,CAAC,CAAC;IAC3B,cAAc,EAAE,GAAG,CAAC,MAAM,EAAE,GAAG,CAAC,CAAC;CAClC;AAED,UAAU,gBAAgB;IACxB,IAAI,EAAE,MAAM,CAAC;IACb,qBAAqB,EAAE,MAAM,CAAC;IAC9B,OAAO,EAAE,MAAM,CAAC;IAChB,GAAG,EAAE,MAAM,CAAC;IACZ,IAAI,EAAE,MAAM,CAAC;IACb,GAAG,EAAE,MAAM,CAAC;IACZ,MAAM,EAAE,MAAM,CAAC;IACf,SAAS,EAAE,MAAM,CAAC;IAClB,iBAAiB,EAAE,GAAG,CAAC;IACvB,kBAAkB,EAAE,GAAG,EAAE,CAAC;IAC1B,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3B,WAAW,EAAE,GAAG,CAAC;IACjB,UAAU,EAAE,GAAG,CAAC;CACjB;AAED;;GAEG;AACH,qBAAa,YAAY;IACvB,OAAO,CAAC,MAAM,CAAS;gBAEX,MAAM,CAAC,EAAE,MAAM;IAI3B,mBAAmB,CAAC,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,YAAY,GAAG,cAAc,EAAE;IAkDzE,OAAO,CAAC,SAAS;CAWlB;AAED,MAAM,WAAW,eAAe;IAC9B,KAAK,CAAC,IAAI,EAAE,cAAc,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,OAAO,EAAE,YAAY,GAAG,OAAO,CAAC,cAAc,EAAE,CAAC,CAAC;CACnH;AAED;;GAEG;AACH,qBAAa,wBAAyB,YAAW,eAAe;IACxD,KAAK,CACT,IAAI,EAAE,cAAc,EACpB,MAAM,EAAE,MAAM,EACd,MAAM,EAAE,UAAU,EAClB,OAAO,EAAE,YAAY,GACpB,OAAO,CAAC,cAAc,EAAE,CAAC;CAO7B;AAED,qBAAa,YAAY;IACjB,UAAU,CACd,KAAK,EAAE,cAAc,EAAE,EACvB,MAAM,EAAE,MAAM,EACd,OAAO,EAAE,YAAY,EACrB,IAAI,EAAE,YAAY,GACjB,OAAO,CAAC,SAAS,EAAE,CAAC;CAGxB;AAED,MAAM,WAAW,yBAAyB;IACxC,SAAS,CAAC,EAAE,YAAY,CAAC;IACzB,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B,eAAe,CAAC,EAAE,eAAe,CAAC;CACnC;AAED,qBAAa,aAAa;IACxB,OAAO,CAAC,cAAc,CAAqB;IAC3C,OAAO,CAAC,SAAS,CAAe;IAChC,OAAO,CAAC,YAAY,CAAe;IACnC,OAAO,CAAC,eAAe,CAAkB;gBAE7B,IAAI,GAAE,yBAA8B;IAM1C,mBAAmB,CAAC,MAAM,EAAE,MAAM,EAAE,IAAI,EAAE,YAAY;IAItD,UAAU,CAAC,KAAK,EAAE,cAAc,EAAE,EAAE,MAAM,EAAE,MAAM,EAAE,OAAO,EAAE,YAAY,EAAE,IAAI,EAAE,YAAY;IAI7F,aAAa,CACjB,UAAU,EAAE,SAAS,EAAE,EACvB,MAAM,EAAE,MAAM,EACd,IAAI,EAAE,YAAY,EAClB,MAAM,EAAE,UAAU,EAClB,YAAY,EAAE,YAAY,EAC1B,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,cAAc,EACxB,iBAAiB,EAAE,MAAM,EAAE,EAC3B,UAAU,EAAE,GAAG,EACf,aAAa,EAAE,CAAC,MAAM,EAAE,gBAAgB,KAAK,OAAO,CAAC,IAAI,CAAC,EAC1D,aAAa,EAAE,GAAG,GACjB,OAAO,CAAC,IAAI,CAAC;YAgCF,UAAU;YAgHV,YAAY;CAmF3B"}
@@ -1,33 +1,24 @@
1
- import Parser from 'tree-sitter';
2
1
  import crypto from 'crypto';
2
+ import Parser from 'tree-sitter';
3
3
  import { analyzeNodeForChunking, batchAnalyzeNodes, yieldStatementChunks } from '../../chunking/semantic-chunker.js';
4
- import { createCombinedChunk } from '../../chunking/file-grouper.js';
4
+ import { groupNodesForChunking, createCombinedChunk } from '../../chunking/file-grouper.js';
5
5
  import { extractSymbolMetadata } from '../../symbols/extract.js';
6
6
  import { extractSymbolName } from '../symbol-extractor.js';
7
7
  import { extractCodevaultMetadata, extractSemanticTags, extractImportantVariables, extractDocComments, generateEnhancedEmbeddingText } from '../metadata.js';
8
8
  import { computeFastHash } from '../../indexer/merkle.js';
9
9
  import { SIZE_THRESHOLD, CHUNK_SIZE } from '../../config/constants.js';
10
- export class ChunkPipeline {
10
+ /**
11
+ * Collects candidate AST nodes for chunking using a reusable parser instance.
12
+ */
13
+ export class ASTTraverser {
11
14
  parser;
12
- processedNodes = new Set();
13
- constructor() {
14
- this.parser = new Parser();
15
+ constructor(parser) {
16
+ this.parser = parser ?? new Parser();
15
17
  }
16
- async collectNodesForFile(source, rule) {
18
+ collectNodesForFile(source, rule) {
17
19
  this.parser.setLanguage(rule.ts);
18
- let tree;
19
- if (source.length > SIZE_THRESHOLD) {
20
- tree = this.parser.parse((index) => {
21
- if (index < source.length) {
22
- return source.slice(index, Math.min(index + CHUNK_SIZE, source.length));
23
- }
24
- return null;
25
- });
26
- }
27
- else {
28
- tree = this.parser.parse(source);
29
- }
30
- if (!tree || !tree.rootNode) {
20
+ const tree = this.buildTree(source);
21
+ if (!tree?.rootNode) {
31
22
  throw new Error('Failed to create syntax tree');
32
23
  }
33
24
  const collectedNodes = [];
@@ -68,6 +59,51 @@ export class ChunkPipeline {
68
59
  collectNodes(tree.rootNode);
69
60
  return collectedNodes;
70
61
  }
62
+ buildTree(source) {
63
+ if (source.length > SIZE_THRESHOLD) {
64
+ return this.parser.parse((index) => {
65
+ if (index < source.length) {
66
+ return source.slice(index, Math.min(index + CHUNK_SIZE, source.length));
67
+ }
68
+ return null;
69
+ });
70
+ }
71
+ return this.parser.parse(source);
72
+ }
73
+ }
74
+ /**
75
+ * Default overlap strategy that falls back to statement-level chunking with 20% overlap.
76
+ */
77
+ export class StatementOverlapStrategy {
78
+ async split(node, source, limits, profile) {
79
+ const statementChunks = await yieldStatementChunks(node, source, limits.max, limits.overlap, profile);
80
+ return statementChunks.map((chunk, index) => ({
81
+ code: chunk.code,
82
+ part: index + 1
83
+ }));
84
+ }
85
+ }
86
+ export class ChunkGrouper {
87
+ async groupNodes(nodes, source, profile, rule) {
88
+ return groupNodesForChunking(nodes, source, profile, rule);
89
+ }
90
+ }
91
+ export class ChunkPipeline {
92
+ processedNodes = new Set();
93
+ traverser;
94
+ chunkGrouper;
95
+ overlapStrategy;
96
+ constructor(deps = {}) {
97
+ this.traverser = deps.traverser ?? new ASTTraverser();
98
+ this.chunkGrouper = deps.chunkGrouper ?? new ChunkGrouper();
99
+ this.overlapStrategy = deps.overlapStrategy ?? new StatementOverlapStrategy();
100
+ }
101
+ async collectNodesForFile(source, rule) {
102
+ return this.traverser.collectNodesForFile(source, rule);
103
+ }
104
+ async groupNodes(nodes, source, profile, rule) {
105
+ return this.chunkGrouper.groupNodes(nodes, source, profile, rule);
106
+ }
71
107
  async processGroups(nodeGroups, source, rule, limits, modelProfile, rel, existing, chunkMerkleHashes, onProgress, embedAndStore, chunkingStats) {
72
108
  this.processedNodes = new Set();
73
109
  for (const nodeGroup of nodeGroups) {
@@ -139,10 +175,9 @@ export class ChunkPipeline {
139
175
  }
140
176
  else if (analysis.size > limits.max) {
141
177
  chunkingStats.statementFallback++;
142
- const statementChunks = await yieldStatementChunks(node, source, limits.max, limits.overlap, modelProfile);
143
- for (let i = 0; i < statementChunks.length; i++) {
144
- const stmtChunk = statementChunks[i];
145
- await this.processChunk(node, stmtChunk.code, `${i + 1}`, parentNode, source, rel, rule, existing, chunkMerkleHashes, onProgress, embedAndStore, chunkingStats);
178
+ const oversizedChunks = await this.overlapStrategy.split(node, source, limits, modelProfile);
179
+ for (const stmtChunk of oversizedChunks) {
180
+ await this.processChunk(node, stmtChunk.code, `${stmtChunk.part}`, parentNode, source, rel, rule, existing, chunkMerkleHashes, onProgress, embedAndStore, chunkingStats);
146
181
  }
147
182
  return;
148
183
  }