icopilot 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (203) hide show
  1. package/CHANGELOG.md +250 -0
  2. package/LICENSE +21 -0
  3. package/README.md +214 -0
  4. package/bin/icopilot.js +6 -0
  5. package/dist/acp/router.js +123 -0
  6. package/dist/acp/schema.js +53 -0
  7. package/dist/agents/aggregator.js +187 -0
  8. package/dist/agents/custom-agents.js +97 -0
  9. package/dist/agents/goal-driven.js +411 -0
  10. package/dist/agents/multi-repo.js +350 -0
  11. package/dist/agents/parallel-runner.js +181 -0
  12. package/dist/agents/router.js +144 -0
  13. package/dist/agents/self-heal.js +481 -0
  14. package/dist/agents/tdd-agent.js +278 -0
  15. package/dist/api/github-models.js +158 -0
  16. package/dist/bridge/ide-bridge.js +479 -0
  17. package/dist/cloud/routine-executor.js +34 -0
  18. package/dist/cloud/routine-scheduler.js +67 -0
  19. package/dist/cloud/routine-storage.js +297 -0
  20. package/dist/commands/acp-cmd.js +143 -0
  21. package/dist/commands/actions-cmd.js +624 -0
  22. package/dist/commands/agent-cmd.js +144 -0
  23. package/dist/commands/alias-cmd.js +132 -0
  24. package/dist/commands/bookmark-cmd.js +77 -0
  25. package/dist/commands/changelog-cmd.js +99 -0
  26. package/dist/commands/changes-cmd.js +120 -0
  27. package/dist/commands/clipboard-cmd.js +217 -0
  28. package/dist/commands/cloud-routine-cmd.js +265 -0
  29. package/dist/commands/codegen-cmd.js +544 -0
  30. package/dist/commands/compare-cmd.js +116 -0
  31. package/dist/commands/context-cmd.js +247 -0
  32. package/dist/commands/context-viz-cmd.js +43 -0
  33. package/dist/commands/conventions-cmd.js +116 -0
  34. package/dist/commands/cost-cmd.js +51 -0
  35. package/dist/commands/deps-cmd.js +294 -0
  36. package/dist/commands/diagram-cmd.js +658 -0
  37. package/dist/commands/diff-review-cmd.js +92 -0
  38. package/dist/commands/doc-cmd.js +412 -0
  39. package/dist/commands/doctor-cmd.js +152 -0
  40. package/dist/commands/editor-cmd.js +49 -0
  41. package/dist/commands/env-cmd.js +86 -0
  42. package/dist/commands/explain-cmd.js +78 -0
  43. package/dist/commands/explain-shell-cmd.js +22 -0
  44. package/dist/commands/explore-cmd.js +231 -0
  45. package/dist/commands/feedback-cmd.js +98 -0
  46. package/dist/commands/fix-cmd.js +17 -0
  47. package/dist/commands/generate-cmd.js +38 -0
  48. package/dist/commands/git-extra.js +197 -0
  49. package/dist/commands/git-log-cmd.js +98 -0
  50. package/dist/commands/git-undo-cmd.js +137 -0
  51. package/dist/commands/git.js +155 -0
  52. package/dist/commands/history-cmd.js +122 -0
  53. package/dist/commands/index-cmd.js +65 -0
  54. package/dist/commands/init-cmd.js +73 -0
  55. package/dist/commands/lint-cmd.js +133 -0
  56. package/dist/commands/memory-cmd.js +98 -0
  57. package/dist/commands/metrics-cmd.js +97 -0
  58. package/dist/commands/mode-prefix.js +30 -0
  59. package/dist/commands/multi-cmd.js +44 -0
  60. package/dist/commands/notify-cmd.js +204 -0
  61. package/dist/commands/profile-cmd.js +101 -0
  62. package/dist/commands/prompts.js +17 -0
  63. package/dist/commands/rag-cmd.js +60 -0
  64. package/dist/commands/readme-cmd.js +564 -0
  65. package/dist/commands/reasoning-cmd.js +34 -0
  66. package/dist/commands/refactor-cmd.js +96 -0
  67. package/dist/commands/release-cmd.js +450 -0
  68. package/dist/commands/repo-cmd.js +195 -0
  69. package/dist/commands/route-cmd.js +21 -0
  70. package/dist/commands/schedule-cmd.js +109 -0
  71. package/dist/commands/search-cmd.js +47 -0
  72. package/dist/commands/security-cmd.js +156 -0
  73. package/dist/commands/settings-cmd.js +238 -0
  74. package/dist/commands/skill-cmd.js +338 -0
  75. package/dist/commands/slash.js +2721 -0
  76. package/dist/commands/snippets-cmd.js +83 -0
  77. package/dist/commands/space-cmd.js +92 -0
  78. package/dist/commands/stash-cmd.js +156 -0
  79. package/dist/commands/stats-cmd.js +36 -0
  80. package/dist/commands/style-cmd.js +85 -0
  81. package/dist/commands/suggest-cmd.js +40 -0
  82. package/dist/commands/summary-cmd.js +138 -0
  83. package/dist/commands/task-cmd.js +58 -0
  84. package/dist/commands/team-memory-cmd.js +97 -0
  85. package/dist/commands/template-cmd.js +475 -0
  86. package/dist/commands/test-cmd.js +146 -0
  87. package/dist/commands/todo-cmd.js +172 -0
  88. package/dist/commands/tokens-cmd.js +277 -0
  89. package/dist/commands/trigger-cmd.js +147 -0
  90. package/dist/commands/undo-cmd.js +18 -0
  91. package/dist/commands/voice-cmd.js +89 -0
  92. package/dist/commands/watch-cmd.js +110 -0
  93. package/dist/commands/web-cmd.js +183 -0
  94. package/dist/commands/worktree-cmd.js +119 -0
  95. package/dist/config-profile.js +66 -0
  96. package/dist/config.js +288 -0
  97. package/dist/context/compactor.js +53 -0
  98. package/dist/context/dep-context.js +329 -0
  99. package/dist/context/file-refs.js +54 -0
  100. package/dist/context/git-context.js +229 -0
  101. package/dist/context/image-input.js +66 -0
  102. package/dist/context/memory.js +55 -0
  103. package/dist/context/persistent-memory.js +104 -0
  104. package/dist/context/pinned.js +96 -0
  105. package/dist/context/priority.js +150 -0
  106. package/dist/context/read-only.js +48 -0
  107. package/dist/context/smart-files.js +286 -0
  108. package/dist/context/team-memory.js +156 -0
  109. package/dist/extensions/loader.js +149 -0
  110. package/dist/extensions/marketplace.js +49 -0
  111. package/dist/extensions/slack-provider.js +181 -0
  112. package/dist/extensions/team.js +56 -0
  113. package/dist/extensions/teams-provider.js +222 -0
  114. package/dist/extensions/voice.js +18 -0
  115. package/dist/hooks/lifecycle.js +215 -0
  116. package/dist/hooks/precommit.js +463 -0
  117. package/dist/index/embeddings.js +23 -0
  118. package/dist/index/indexer.js +86 -0
  119. package/dist/index/retrieve.js +20 -0
  120. package/dist/index/store.js +95 -0
  121. package/dist/index.js +286 -0
  122. package/dist/intelligence/dead-code.js +457 -0
  123. package/dist/intelligence/error-watch.js +263 -0
  124. package/dist/intelligence/navigation.js +141 -0
  125. package/dist/intelligence/stack-trace.js +210 -0
  126. package/dist/intelligence/symbol-index.js +410 -0
  127. package/dist/knowledge/auto-memory.js +412 -0
  128. package/dist/knowledge/conventions.js +475 -0
  129. package/dist/knowledge/corrections.js +213 -0
  130. package/dist/knowledge/rag.js +450 -0
  131. package/dist/knowledge/style-learner.js +324 -0
  132. package/dist/logger.js +35 -0
  133. package/dist/mcp/client.js +144 -0
  134. package/dist/mcp/config.js +24 -0
  135. package/dist/mcp/index.js +89 -0
  136. package/dist/modes/auto-compact.js +20 -0
  137. package/dist/modes/autopilot.js +157 -0
  138. package/dist/modes/background.js +82 -0
  139. package/dist/modes/interactive.js +187 -0
  140. package/dist/modes/oneshot.js +36 -0
  141. package/dist/modes/tui.js +265 -0
  142. package/dist/modes/turn.js +342 -0
  143. package/dist/notifications/manager.js +107 -0
  144. package/dist/plugins/marketplace.js +244 -0
  145. package/dist/providers/custom-provider.js +298 -0
  146. package/dist/providers/local-model.js +121 -0
  147. package/dist/routing/profiles.js +44 -0
  148. package/dist/routing/router.js +18 -0
  149. package/dist/sandbox/container.js +151 -0
  150. package/dist/security/audit.js +237 -0
  151. package/dist/security/content-filter.js +449 -0
  152. package/dist/security/proxy.js +301 -0
  153. package/dist/security/retention.js +281 -0
  154. package/dist/security/roles.js +252 -0
  155. package/dist/server/api-server.js +679 -0
  156. package/dist/session/bookmarks.js +72 -0
  157. package/dist/session/cloud-session.js +291 -0
  158. package/dist/session/handoff.js +405 -0
  159. package/dist/session/manager.js +35 -0
  160. package/dist/session/session.js +296 -0
  161. package/dist/session/share.js +313 -0
  162. package/dist/session/undo-journal.js +91 -0
  163. package/dist/snippets/store.js +60 -0
  164. package/dist/spaces/space-config.js +156 -0
  165. package/dist/spaces/space.js +220 -0
  166. package/dist/stats/store.js +101 -0
  167. package/dist/tools/apply-patch.js +134 -0
  168. package/dist/tools/auto-check.js +218 -0
  169. package/dist/tools/diff-edit.js +150 -0
  170. package/dist/tools/diff-prompt.js +36 -0
  171. package/dist/tools/edit-file.js +66 -0
  172. package/dist/tools/file-ops.js +205 -0
  173. package/dist/tools/glob.js +17 -0
  174. package/dist/tools/grep.js +56 -0
  175. package/dist/tools/image.js +194 -0
  176. package/dist/tools/list-directory.js +228 -0
  177. package/dist/tools/memory.js +17 -0
  178. package/dist/tools/multi-edit.js +299 -0
  179. package/dist/tools/policy.js +95 -0
  180. package/dist/tools/registry.js +484 -0
  181. package/dist/tools/retry.js +74 -0
  182. package/dist/tools/run-in-terminal.js +162 -0
  183. package/dist/tools/safety.js +64 -0
  184. package/dist/tools/sandbox.js +15 -0
  185. package/dist/tools/search-symbols.js +212 -0
  186. package/dist/tools/shell.js +118 -0
  187. package/dist/tools/web.js +167 -0
  188. package/dist/ui/prompt.js +37 -0
  189. package/dist/ui/render.js +96 -0
  190. package/dist/ui/screen.js +13 -0
  191. package/dist/ui/theme.js +56 -0
  192. package/dist/util/browser.js +34 -0
  193. package/dist/util/completion.js +350 -0
  194. package/dist/util/cost.js +28 -0
  195. package/dist/util/keybindings.js +113 -0
  196. package/dist/util/lazy.js +26 -0
  197. package/dist/util/perf.js +25 -0
  198. package/dist/util/token-worker.js +11 -0
  199. package/dist/util/tokens.js +50 -0
  200. package/dist/workflows/builtins.js +128 -0
  201. package/dist/workflows/engine.js +496 -0
  202. package/dist/workflows/file-trigger.js +197 -0
  203. package/package.json +79 -0
@@ -0,0 +1,450 @@
1
+ import crypto from 'node:crypto';
2
+ import fs from 'node:fs';
3
+ import path from 'node:path';
4
+ import fg from 'fast-glob';
5
+ import { countTokensSync } from '../util/tokens.js';
6
+ const DEFAULT_EXTENSIONS = [
7
+ '.md',
8
+ '.mdx',
9
+ '.txt',
10
+ '.rst',
11
+ '.adoc',
12
+ '.ts',
13
+ '.tsx',
14
+ '.js',
15
+ '.jsx',
16
+ '.mjs',
17
+ '.cjs',
18
+ '.json',
19
+ '.yml',
20
+ '.yaml',
21
+ '.py',
22
+ '.go',
23
+ '.java',
24
+ '.rs',
25
+ '.cs',
26
+ '.rb',
27
+ ];
28
+ const DEFAULT_MAX_CHUNK_TOKENS = 400;
29
+ const DEFAULT_OVERLAP = 60;
30
+ const DEFAULT_IGNORES = [
31
+ '**/node_modules/**',
32
+ '**/dist/**',
33
+ '**/.git/**',
34
+ '**/coverage/**',
35
+ '**/.icopilot/**',
36
+ ];
37
+ const CODE_EXTENSIONS = new Set([
38
+ '.ts',
39
+ '.tsx',
40
+ '.js',
41
+ '.jsx',
42
+ '.mjs',
43
+ '.cjs',
44
+ '.py',
45
+ '.go',
46
+ '.java',
47
+ '.rs',
48
+ '.cs',
49
+ '.rb',
50
+ ]);
51
+ const CODE_BOUNDARY_RE = /^\s*(?:export\s+)?(?:default\s+)?(?:async\s+)?(?:function|class|interface|type|enum)\b|^\s*(?:export\s+)?const\s+[A-Za-z0-9_$]+\s*=\s*(?:async\s*)?(?:\([^)]*\)|[A-Za-z0-9_$]+)\s*=>|^\s*(?:public|private|protected)\s+(?:async\s+)?[A-Za-z0-9_$]+\s*\(/;
52
+ export function defaultRagIndexPath(rootDir) {
53
+ return path.join(rootDir, '.icopilot', 'rag-index.json');
54
+ }
55
+ export class RAGIndex {
56
+ rootDir = process.cwd();
57
+ options = normalizeOptions();
58
+ documents = new Map();
59
+ chunks = new Map();
60
+ documentFrequency = new Map();
61
+ inverseDocumentFrequency = new Map();
62
+ chunkVectors = new Map();
63
+ chunkNorms = new Map();
64
+ async indexProject(rootDir, options) {
65
+ this.rootDir = path.resolve(rootDir);
66
+ this.options = normalizeOptions(options);
67
+ this.documents.clear();
68
+ const files = await fg(buildPatterns(this.options.extensions), {
69
+ cwd: this.rootDir,
70
+ onlyFiles: true,
71
+ dot: false,
72
+ ignore: DEFAULT_IGNORES,
73
+ });
74
+ for (const file of files.sort()) {
75
+ this.upsertDocument(path.resolve(this.rootDir, file), false);
76
+ }
77
+ this.rebuildSearchModel();
78
+ this.save(defaultRagIndexPath(this.rootDir));
79
+ }
80
+ search(query, k = 5) {
81
+ return this.searchScored(query, k).map((match) => match.chunk);
82
+ }
83
+ addDocument(filePath) {
84
+ this.upsertDocument(filePath, true);
85
+ }
86
+ removeDocument(filePath) {
87
+ const normalized = this.toStoredPath(filePath);
88
+ if (this.documents.delete(normalized)) {
89
+ this.rebuildSearchModel();
90
+ }
91
+ }
92
+ getStats() {
93
+ let chunks = 0;
94
+ let totalTokens = 0;
95
+ for (const document of this.documents.values()) {
96
+ chunks += document.chunks.length;
97
+ totalTokens += document.chunks.reduce((sum, chunk) => sum + chunk.tokens, 0);
98
+ }
99
+ return {
100
+ documents: this.documents.size,
101
+ chunks,
102
+ totalTokens,
103
+ };
104
+ }
105
+ save(filePath) {
106
+ const target = path.resolve(filePath);
107
+ fs.mkdirSync(path.dirname(target), { recursive: true });
108
+ const payload = {
109
+ version: 1,
110
+ rootDir: this.rootDir,
111
+ options: this.options,
112
+ documents: [...this.documents.values()],
113
+ };
114
+ fs.writeFileSync(target, JSON.stringify(payload, null, 2), 'utf8');
115
+ }
116
+ load(filePath) {
117
+ const target = path.resolve(filePath);
118
+ const parsed = JSON.parse(fs.readFileSync(target, 'utf8'));
119
+ this.rootDir = typeof parsed.rootDir === 'string' ? parsed.rootDir : process.cwd();
120
+ this.options = normalizeOptions(parsed.options);
121
+ this.documents.clear();
122
+ for (const document of Array.isArray(parsed.documents) ? parsed.documents : []) {
123
+ if (!document || typeof document.path !== 'string' || typeof document.content !== 'string')
124
+ continue;
125
+ const chunks = Array.isArray(document.chunks) ? document.chunks.filter(isChunk) : [];
126
+ const doc = {
127
+ id: typeof document.id === 'string' ? document.id : hashId(document.path),
128
+ path: document.path,
129
+ content: document.content,
130
+ chunks,
131
+ };
132
+ this.documents.set(doc.path, doc);
133
+ }
134
+ this.rebuildSearchModel();
135
+ }
136
+ upsertDocument(filePath, rebuild) {
137
+ const absolute = path.isAbsolute(filePath) ? filePath : path.resolve(this.rootDir, filePath);
138
+ const storedPath = this.toStoredPath(absolute);
139
+ const content = fs.readFileSync(absolute, 'utf8');
140
+ const chunks = buildChunks(storedPath, content, this.options.maxChunkTokens, this.options.overlap);
141
+ const document = {
142
+ id: hashId(storedPath),
143
+ path: storedPath,
144
+ content,
145
+ chunks,
146
+ };
147
+ this.documents.set(storedPath, document);
148
+ if (rebuild) {
149
+ this.rebuildSearchModel();
150
+ }
151
+ }
152
+ toStoredPath(filePath) {
153
+ const absolute = path.isAbsolute(filePath) ? filePath : path.resolve(this.rootDir, filePath);
154
+ const relative = path.relative(this.rootDir, absolute);
155
+ const normalized = (relative && !relative.startsWith('..') ? relative : absolute).replace(/\\/g, '/');
156
+ return normalized.replace(/^\.\//, '');
157
+ }
158
+ rebuildSearchModel() {
159
+ this.chunks.clear();
160
+ this.documentFrequency.clear();
161
+ this.inverseDocumentFrequency.clear();
162
+ this.chunkVectors.clear();
163
+ this.chunkNorms.clear();
164
+ const chunkTermCounts = new Map();
165
+ const chunkList = [...this.documents.values()].flatMap((document) => document.chunks);
166
+ for (const chunk of chunkList) {
167
+ this.chunks.set(chunk.id, chunk);
168
+ const counts = countTerms(chunk.text);
169
+ if (!counts.size)
170
+ continue;
171
+ chunkTermCounts.set(chunk.id, counts);
172
+ for (const term of counts.keys()) {
173
+ this.documentFrequency.set(term, (this.documentFrequency.get(term) ?? 0) + 1);
174
+ }
175
+ }
176
+ const totalChunks = Math.max(chunkList.length, 1);
177
+ for (const [term, frequency] of this.documentFrequency) {
178
+ this.inverseDocumentFrequency.set(term, Math.log((1 + totalChunks) / (1 + frequency)) + 1);
179
+ }
180
+ for (const [chunkId, counts] of chunkTermCounts) {
181
+ const vector = new Map();
182
+ let sumSquares = 0;
183
+ for (const [term, count] of counts) {
184
+ const weight = (1 + Math.log(count)) * (this.inverseDocumentFrequency.get(term) ?? 1);
185
+ vector.set(term, weight);
186
+ sumSquares += weight * weight;
187
+ }
188
+ this.chunkVectors.set(chunkId, vector);
189
+ this.chunkNorms.set(chunkId, Math.sqrt(sumSquares));
190
+ }
191
+ }
192
+ searchScored(query, k) {
193
+ const queryCounts = countTerms(query);
194
+ if (!queryCounts.size || !this.chunks.size)
195
+ return [];
196
+ const totalChunks = Math.max(this.chunks.size, 1);
197
+ const queryVector = new Map();
198
+ let querySumSquares = 0;
199
+ for (const [term, count] of queryCounts) {
200
+ const idf = this.inverseDocumentFrequency.get(term) ?? Math.log((1 + totalChunks) / 1) + 1;
201
+ const weight = (1 + Math.log(count)) * idf;
202
+ queryVector.set(term, weight);
203
+ querySumSquares += weight * weight;
204
+ }
205
+ const queryNorm = Math.sqrt(querySumSquares);
206
+ if (!queryNorm)
207
+ return [];
208
+ return [...this.chunks.values()]
209
+ .map((chunk) => {
210
+ const vector = this.chunkVectors.get(chunk.id);
211
+ const chunkNorm = this.chunkNorms.get(chunk.id) ?? 0;
212
+ if (!vector || !chunkNorm)
213
+ return null;
214
+ let dot = 0;
215
+ for (const [term, weight] of queryVector) {
216
+ dot += weight * (vector.get(term) ?? 0);
217
+ }
218
+ const score = dot / (queryNorm * chunkNorm);
219
+ return score > 0 ? { chunk, score } : null;
220
+ })
221
+ .filter((match) => Boolean(match))
222
+ .sort((left, right) => right.score - left.score)
223
+ .slice(0, Math.max(1, k));
224
+ }
225
+ }
226
+ function normalizeOptions(options) {
227
+ const extensions = Array.isArray(options?.extensions) && options.extensions.length
228
+ ? options.extensions.map(normalizeExtension)
229
+ : [...DEFAULT_EXTENSIONS];
230
+ return {
231
+ extensions: [...new Set(extensions)],
232
+ maxChunkTokens: typeof options?.maxChunkTokens === 'number' && options.maxChunkTokens > 0
233
+ ? Math.floor(options.maxChunkTokens)
234
+ : DEFAULT_MAX_CHUNK_TOKENS,
235
+ overlap: typeof options?.overlap === 'number' && options.overlap >= 0
236
+ ? Math.floor(options.overlap)
237
+ : DEFAULT_OVERLAP,
238
+ };
239
+ }
240
+ function normalizeExtension(extension) {
241
+ return extension.startsWith('.') ? extension.toLowerCase() : `.${extension.toLowerCase()}`;
242
+ }
243
+ function buildPatterns(extensions) {
244
+ return extensions.map((extension) => `**/*${extension}`);
245
+ }
246
+ function buildChunks(file, content, maxTokens, overlapTokens) {
247
+ const segments = packSegments(splitOversizedSegments(splitIntoSegments(file, content), maxTokens), maxTokens, overlapTokens);
248
+ return segments.map((segmentSet, index) => ({
249
+ id: `${hashId(file)}:${index}`,
250
+ text: segmentSet
251
+ .map((segment) => segment.text)
252
+ .join('\n\n')
253
+ .trim(),
254
+ tokens: segmentSet.reduce((sum, segment) => sum + segment.tokens, 0),
255
+ metadata: {
256
+ file,
257
+ startLine: segmentSet[0]?.startLine ?? 1,
258
+ endLine: segmentSet[segmentSet.length - 1]?.endLine ?? 1,
259
+ },
260
+ }));
261
+ }
262
+ function splitIntoSegments(file, content) {
263
+ const lines = content.split(/\r?\n/);
264
+ const extension = path.extname(file).toLowerCase();
265
+ const initial = CODE_EXTENSIONS.has(extension)
266
+ ? splitCodeSegments(lines)
267
+ : splitParagraphSegments(lines);
268
+ return initial
269
+ .map((segment) => ({
270
+ ...segment,
271
+ text: segment.text.trim(),
272
+ tokens: countTokensSync(segment.text.trim()),
273
+ }))
274
+ .filter((segment) => segment.text.length > 0);
275
+ }
276
+ function splitCodeSegments(lines) {
277
+ const boundaries = new Set();
278
+ lines.forEach((line, index) => {
279
+ if (CODE_BOUNDARY_RE.test(line)) {
280
+ boundaries.add(index);
281
+ }
282
+ });
283
+ if (!boundaries.size) {
284
+ return splitParagraphSegments(lines);
285
+ }
286
+ const ordered = [...boundaries].sort((left, right) => left - right);
287
+ const segments = [];
288
+ let start = 0;
289
+ for (const boundary of ordered) {
290
+ if (boundary > start) {
291
+ pushSegment(segments, lines, start, boundary - 1);
292
+ }
293
+ start = boundary;
294
+ }
295
+ pushSegment(segments, lines, start, lines.length - 1);
296
+ return segments;
297
+ }
298
+ function splitParagraphSegments(lines) {
299
+ const segments = [];
300
+ let start = -1;
301
+ for (let index = 0; index < lines.length; index += 1) {
302
+ const line = lines[index] ?? '';
303
+ const trimmed = line.trim();
304
+ const heading = /^#{1,6}\s/.test(trimmed);
305
+ if (trimmed.length === 0) {
306
+ if (start >= 0) {
307
+ pushSegment(segments, lines, start, index - 1);
308
+ start = -1;
309
+ }
310
+ continue;
311
+ }
312
+ if (heading && start >= 0) {
313
+ pushSegment(segments, lines, start, index - 1);
314
+ start = index;
315
+ continue;
316
+ }
317
+ if (start < 0) {
318
+ start = index;
319
+ }
320
+ }
321
+ if (start >= 0) {
322
+ pushSegment(segments, lines, start, lines.length - 1);
323
+ }
324
+ return segments;
325
+ }
326
+ function splitOversizedSegments(segments, maxTokens) {
327
+ const result = [];
328
+ for (const segment of segments) {
329
+ if (segment.tokens <= maxTokens) {
330
+ result.push(segment);
331
+ continue;
332
+ }
333
+ const lines = segment.text.split('\n');
334
+ let startLine = segment.startLine;
335
+ let buffer = [];
336
+ for (let index = 0; index < lines.length; index += 1) {
337
+ const line = lines[index] ?? '';
338
+ const nextBuffer = [...buffer, line];
339
+ const nextText = nextBuffer.join('\n').trim();
340
+ const nextTokens = countTokensSync(nextText);
341
+ if (buffer.length > 0 && nextTokens > maxTokens) {
342
+ const bufferedText = buffer.join('\n').trim();
343
+ result.push({
344
+ text: bufferedText,
345
+ tokens: countTokensSync(bufferedText),
346
+ startLine,
347
+ endLine: startLine + buffer.length - 1,
348
+ });
349
+ startLine += buffer.length;
350
+ buffer = [line];
351
+ continue;
352
+ }
353
+ buffer = nextBuffer;
354
+ }
355
+ if (buffer.length > 0) {
356
+ const bufferedText = buffer.join('\n').trim();
357
+ result.push({
358
+ text: bufferedText,
359
+ tokens: countTokensSync(bufferedText),
360
+ startLine,
361
+ endLine: startLine + buffer.length - 1,
362
+ });
363
+ }
364
+ }
365
+ return result;
366
+ }
367
+ function packSegments(segments, maxTokens, overlapTokens) {
368
+ if (!segments.length)
369
+ return [];
370
+ const groups = [];
371
+ let window = [];
372
+ let windowTokens = 0;
373
+ let index = 0;
374
+ while (index < segments.length) {
375
+ const segment = segments[index];
376
+ if (window.length > 0 && windowTokens + segment.tokens > maxTokens) {
377
+ groups.push(window);
378
+ window = buildOverlap(window, overlapTokens);
379
+ windowTokens = window.reduce((sum, item) => sum + item.tokens, 0);
380
+ while (window.length > 0 && windowTokens + segment.tokens > maxTokens) {
381
+ const removed = window.shift();
382
+ if (!removed)
383
+ break;
384
+ windowTokens -= removed.tokens;
385
+ }
386
+ continue;
387
+ }
388
+ window.push(segment);
389
+ windowTokens += segment.tokens;
390
+ index += 1;
391
+ }
392
+ if (window.length > 0) {
393
+ groups.push(window);
394
+ }
395
+ return groups;
396
+ }
397
+ function buildOverlap(segments, overlapTokens) {
398
+ if (overlapTokens <= 0)
399
+ return [];
400
+ const overlap = [];
401
+ let tokens = 0;
402
+ for (let index = segments.length - 1; index >= 0; index -= 1) {
403
+ const segment = segments[index];
404
+ overlap.unshift(segment);
405
+ tokens += segment.tokens;
406
+ if (tokens >= overlapTokens)
407
+ break;
408
+ }
409
+ return overlap;
410
+ }
411
+ function pushSegment(segments, lines, startIndex, endIndex) {
412
+ if (startIndex > endIndex)
413
+ return;
414
+ const text = lines
415
+ .slice(startIndex, endIndex + 1)
416
+ .join('\n')
417
+ .trim();
418
+ if (!text)
419
+ return;
420
+ segments.push({
421
+ text,
422
+ startLine: startIndex + 1,
423
+ endLine: endIndex + 1,
424
+ });
425
+ }
426
+ function countTerms(text) {
427
+ const counts = new Map();
428
+ const terms = text
429
+ .replace(/([a-z0-9])([A-Z])/g, '$1 $2')
430
+ .toLowerCase()
431
+ .match(/[a-z0-9_]+/g) ?? [];
432
+ for (const term of terms) {
433
+ counts.set(term, (counts.get(term) ?? 0) + 1);
434
+ }
435
+ return counts;
436
+ }
437
+ function hashId(value) {
438
+ return crypto.createHash('sha1').update(value).digest('hex');
439
+ }
440
+ function isChunk(value) {
441
+ if (!value || typeof value !== 'object')
442
+ return false;
443
+ const chunk = value;
444
+ return (typeof chunk.id === 'string' &&
445
+ typeof chunk.text === 'string' &&
446
+ typeof chunk.tokens === 'number' &&
447
+ typeof chunk.metadata?.file === 'string' &&
448
+ typeof chunk.metadata?.startLine === 'number' &&
449
+ typeof chunk.metadata?.endLine === 'number');
450
+ }