claude-eidetic 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +333 -0
- package/dist/config.d.ts +25 -0
- package/dist/config.js +29 -10
- package/dist/core/cleanup.d.ts +8 -0
- package/dist/core/cleanup.js +41 -0
- package/dist/core/doc-indexer.d.ts +13 -0
- package/dist/core/doc-indexer.js +76 -0
- package/dist/core/doc-searcher.d.ts +13 -0
- package/dist/core/doc-searcher.js +65 -0
- package/dist/core/file-category.d.ts +7 -0
- package/dist/core/file-category.js +75 -0
- package/dist/core/indexer.js +12 -4
- package/dist/core/preview.d.ts +1 -2
- package/dist/core/preview.js +2 -5
- package/dist/core/repo-map.d.ts +33 -0
- package/dist/core/repo-map.js +144 -0
- package/dist/core/searcher.d.ts +1 -13
- package/dist/core/searcher.js +20 -24
- package/dist/core/snapshot-io.js +2 -2
- package/dist/core/sync.d.ts +5 -25
- package/dist/core/sync.js +90 -65
- package/dist/core/targeted-indexer.d.ts +19 -0
- package/dist/core/targeted-indexer.js +127 -0
- package/dist/embedding/factory.d.ts +0 -13
- package/dist/embedding/factory.js +0 -17
- package/dist/embedding/openai.d.ts +2 -14
- package/dist/embedding/openai.js +7 -20
- package/dist/errors.d.ts +2 -0
- package/dist/errors.js +2 -0
- package/dist/format.d.ts +12 -0
- package/dist/format.js +160 -31
- package/dist/hooks/post-tool-use.d.ts +13 -0
- package/dist/hooks/post-tool-use.js +113 -0
- package/dist/hooks/stop-hook.d.ts +11 -0
- package/dist/hooks/stop-hook.js +121 -0
- package/dist/hooks/targeted-runner.d.ts +11 -0
- package/dist/hooks/targeted-runner.js +66 -0
- package/dist/index.js +68 -9
- package/dist/infra/qdrant-bootstrap.js +14 -12
- package/dist/memory/history.d.ts +19 -0
- package/dist/memory/history.js +40 -0
- package/dist/memory/llm.d.ts +2 -0
- package/dist/memory/llm.js +56 -0
- package/dist/memory/prompts.d.ts +5 -0
- package/dist/memory/prompts.js +36 -0
- package/dist/memory/reconciler.d.ts +12 -0
- package/dist/memory/reconciler.js +36 -0
- package/dist/memory/store.d.ts +20 -0
- package/dist/memory/store.js +206 -0
- package/dist/memory/types.d.ts +28 -0
- package/dist/memory/types.js +2 -0
- package/dist/paths.d.ts +3 -4
- package/dist/paths.js +14 -4
- package/dist/precompact/hook.d.ts +9 -0
- package/dist/precompact/hook.js +170 -0
- package/dist/precompact/index-runner.d.ts +9 -0
- package/dist/precompact/index-runner.js +52 -0
- package/dist/precompact/note-writer.d.ts +15 -0
- package/dist/precompact/note-writer.js +109 -0
- package/dist/precompact/session-indexer.d.ts +13 -0
- package/dist/precompact/session-indexer.js +31 -0
- package/dist/precompact/tier0-inject.d.ts +16 -0
- package/dist/precompact/tier0-inject.js +88 -0
- package/dist/precompact/tier0-writer.d.ts +16 -0
- package/dist/precompact/tier0-writer.js +74 -0
- package/dist/precompact/transcript-parser.d.ts +10 -0
- package/dist/precompact/transcript-parser.js +148 -0
- package/dist/precompact/types.d.ts +93 -0
- package/dist/precompact/types.js +5 -0
- package/dist/precompact/utils.d.ts +29 -0
- package/dist/precompact/utils.js +95 -0
- package/dist/setup-message.d.ts +2 -2
- package/dist/setup-message.js +39 -20
- package/dist/splitter/ast.js +84 -22
- package/dist/splitter/line.d.ts +0 -4
- package/dist/splitter/line.js +1 -7
- package/dist/splitter/symbol-extract.d.ts +16 -0
- package/dist/splitter/symbol-extract.js +61 -0
- package/dist/splitter/types.d.ts +5 -0
- package/dist/splitter/types.js +1 -1
- package/dist/state/doc-metadata.d.ts +18 -0
- package/dist/state/doc-metadata.js +59 -0
- package/dist/state/registry.d.ts +1 -3
- package/dist/state/snapshot.d.ts +0 -1
- package/dist/state/snapshot.js +3 -19
- package/dist/tool-schemas.d.ts +251 -1
- package/dist/tool-schemas.js +307 -0
- package/dist/tools.d.ts +69 -0
- package/dist/tools.js +286 -17
- package/dist/vectordb/milvus.d.ts +7 -5
- package/dist/vectordb/milvus.js +116 -19
- package/dist/vectordb/qdrant.d.ts +8 -10
- package/dist/vectordb/qdrant.js +105 -33
- package/dist/vectordb/types.d.ts +20 -0
- package/messages.yaml +50 -0
- package/package.json +31 -6
package/dist/core/sync.js
CHANGED
|
@@ -3,34 +3,60 @@ import path from 'node:path';
|
|
|
3
3
|
import { createHash } from 'node:crypto';
|
|
4
4
|
import { glob } from 'glob';
|
|
5
5
|
const DEFAULT_EXTENSIONS = new Set([
|
|
6
|
-
'.ts',
|
|
7
|
-
'.
|
|
6
|
+
'.ts',
|
|
7
|
+
'.tsx',
|
|
8
|
+
'.js',
|
|
9
|
+
'.jsx',
|
|
10
|
+
'.mjs',
|
|
11
|
+
'.cjs',
|
|
12
|
+
'.py',
|
|
13
|
+
'.pyi',
|
|
8
14
|
'.go',
|
|
9
15
|
'.java',
|
|
10
16
|
'.rs',
|
|
11
|
-
'.cpp',
|
|
17
|
+
'.cpp',
|
|
18
|
+
'.cc',
|
|
19
|
+
'.cxx',
|
|
20
|
+
'.c',
|
|
21
|
+
'.h',
|
|
22
|
+
'.hpp',
|
|
12
23
|
'.cs',
|
|
13
24
|
'.scala',
|
|
14
25
|
'.rb',
|
|
15
26
|
'.php',
|
|
16
27
|
'.swift',
|
|
17
|
-
'.kt',
|
|
28
|
+
'.kt',
|
|
29
|
+
'.kts',
|
|
18
30
|
'.lua',
|
|
19
|
-
'.sh',
|
|
31
|
+
'.sh',
|
|
32
|
+
'.bash',
|
|
33
|
+
'.zsh',
|
|
20
34
|
'.sql',
|
|
21
|
-
'.r',
|
|
22
|
-
'.
|
|
35
|
+
'.r',
|
|
36
|
+
'.R',
|
|
37
|
+
'.m',
|
|
38
|
+
'.mm', // Objective-C
|
|
23
39
|
'.dart',
|
|
24
|
-
'.ex',
|
|
25
|
-
'.
|
|
40
|
+
'.ex',
|
|
41
|
+
'.exs', // Elixir
|
|
42
|
+
'.erl',
|
|
43
|
+
'.hrl', // Erlang
|
|
26
44
|
'.hs', // Haskell
|
|
27
|
-
'.ml',
|
|
28
|
-
'.
|
|
29
|
-
'.
|
|
45
|
+
'.ml',
|
|
46
|
+
'.mli', // OCaml
|
|
47
|
+
'.vue',
|
|
48
|
+
'.svelte',
|
|
49
|
+
'.astro',
|
|
50
|
+
'.yaml',
|
|
51
|
+
'.yml',
|
|
30
52
|
'.toml',
|
|
31
53
|
'.json',
|
|
32
|
-
'.md',
|
|
33
|
-
'.
|
|
54
|
+
'.md',
|
|
55
|
+
'.mdx',
|
|
56
|
+
'.html',
|
|
57
|
+
'.css',
|
|
58
|
+
'.scss',
|
|
59
|
+
'.less',
|
|
34
60
|
]);
|
|
35
61
|
const DEFAULT_IGNORE = [
|
|
36
62
|
'**/node_modules/**',
|
|
@@ -51,13 +77,8 @@ const DEFAULT_IGNORE = [
|
|
|
51
77
|
'**/pnpm-lock.yaml',
|
|
52
78
|
'**/yarn.lock',
|
|
53
79
|
];
|
|
54
|
-
/**
|
|
55
|
-
* Scan a directory and return relative paths of indexable files.
|
|
56
|
-
* Respects .gitignore if present.
|
|
57
|
-
*/
|
|
58
80
|
export async function scanFiles(rootPath, customExtensions = [], customIgnore = []) {
|
|
59
81
|
const extensions = new Set([...DEFAULT_EXTENSIONS, ...customExtensions]);
|
|
60
|
-
// Read .gitignore patterns if present
|
|
61
82
|
const gitignorePatterns = readGitignore(rootPath);
|
|
62
83
|
const allIgnore = [...DEFAULT_IGNORE, ...gitignorePatterns, ...customIgnore];
|
|
63
84
|
const files = await glob('**/*', {
|
|
@@ -67,22 +88,12 @@ export async function scanFiles(rootPath, customExtensions = [], customIgnore =
|
|
|
67
88
|
ignore: allIgnore,
|
|
68
89
|
absolute: false,
|
|
69
90
|
});
|
|
70
|
-
return files
|
|
71
|
-
.filter(f => extensions.has(path.extname(f).toLowerCase()))
|
|
72
|
-
.sort();
|
|
91
|
+
return files.filter((f) => extensions.has(path.extname(f).toLowerCase())).sort();
|
|
73
92
|
}
|
|
74
|
-
/**
|
|
75
|
-
* Compute a truncated SHA-256 hash of a file's contents.
|
|
76
|
-
* 16 hex chars (64 bits) is sufficient for change detection — collisions
|
|
77
|
-
* would only cause a redundant re-index, not data loss.
|
|
78
|
-
*/
|
|
79
93
|
function hashFileContent(fullPath) {
|
|
80
94
|
const content = fs.readFileSync(fullPath);
|
|
81
95
|
return createHash('sha256').update(content).digest('hex').slice(0, 16);
|
|
82
96
|
}
|
|
83
|
-
/**
|
|
84
|
-
* Build a size+contentHash snapshot for a list of files.
|
|
85
|
-
*/
|
|
86
97
|
export function buildSnapshot(rootPath, relativePaths) {
|
|
87
98
|
const snapshot = {};
|
|
88
99
|
for (const rel of relativePaths) {
|
|
@@ -92,16 +103,11 @@ export function buildSnapshot(rootPath, relativePaths) {
|
|
|
92
103
|
snapshot[rel] = { contentHash };
|
|
93
104
|
}
|
|
94
105
|
catch (err) {
|
|
95
|
-
console.warn(`Skipping "${rel}"
|
|
106
|
+
console.warn(`Skipping "${rel}":`, err);
|
|
96
107
|
}
|
|
97
108
|
}
|
|
98
109
|
return snapshot;
|
|
99
110
|
}
|
|
100
|
-
/**
|
|
101
|
-
* Compare current snapshot to a previous one. Returns added, modified, and removed files.
|
|
102
|
-
* Uses content hash as the authoritative change signal — immune to git ops, IDE formatters,
|
|
103
|
-
* NFS clock skew, and other mtime-only pitfalls.
|
|
104
|
-
*/
|
|
105
111
|
export function diffSnapshots(previous, current) {
|
|
106
112
|
const added = [];
|
|
107
113
|
const modified = [];
|
|
@@ -122,31 +128,23 @@ export function diffSnapshots(previous, current) {
|
|
|
122
128
|
}
|
|
123
129
|
return { added, modified, removed };
|
|
124
130
|
}
|
|
125
|
-
/**
|
|
126
|
-
* Parse .gitignore content into glob patterns.
|
|
127
|
-
* Pure function — no filesystem access.
|
|
128
|
-
*/
|
|
129
131
|
export function parseGitignorePatterns(content) {
|
|
130
132
|
return content
|
|
131
133
|
.split('\n')
|
|
132
|
-
.map(line => line.trim())
|
|
133
|
-
.filter(line => line && !line.startsWith('#') && !line.startsWith('!'))
|
|
134
|
-
.map(pattern => {
|
|
135
|
-
// Strip trailing spaces (gitignore spec)
|
|
134
|
+
.map((line) => line.trim())
|
|
135
|
+
.filter((line) => line && !line.startsWith('#') && !line.startsWith('!'))
|
|
136
|
+
.map((pattern) => {
|
|
136
137
|
pattern = pattern.replace(/\s+$/, '');
|
|
137
|
-
// Directory-only patterns: trailing /
|
|
138
138
|
if (pattern.endsWith('/')) {
|
|
139
139
|
pattern = pattern.slice(0, -1);
|
|
140
140
|
}
|
|
141
|
-
// Rooted patterns: leading /
|
|
142
141
|
if (pattern.startsWith('/'))
|
|
143
142
|
return pattern.slice(1);
|
|
144
|
-
// Unrooted patterns without / match anywhere
|
|
145
143
|
if (!pattern.includes('/'))
|
|
146
144
|
return `**/${pattern}`;
|
|
147
145
|
return pattern;
|
|
148
146
|
})
|
|
149
|
-
.filter(p => p.length > 0);
|
|
147
|
+
.filter((p) => p.length > 0);
|
|
150
148
|
}
|
|
151
149
|
function readGitignore(rootPath) {
|
|
152
150
|
const gitignorePath = path.join(rootPath, '.gitignore');
|
|
@@ -158,30 +156,57 @@ function readGitignore(rootPath) {
|
|
|
158
156
|
return [];
|
|
159
157
|
}
|
|
160
158
|
}
|
|
161
|
-
/**
|
|
162
|
-
* Map file extension to language name for the splitter.
|
|
163
|
-
*/
|
|
164
159
|
export function extensionToLanguage(ext) {
|
|
165
160
|
const map = {
|
|
166
|
-
'.ts': 'typescript',
|
|
167
|
-
'.
|
|
168
|
-
'.
|
|
161
|
+
'.ts': 'typescript',
|
|
162
|
+
'.tsx': 'tsx',
|
|
163
|
+
'.js': 'javascript',
|
|
164
|
+
'.jsx': 'javascript',
|
|
165
|
+
'.mjs': 'javascript',
|
|
166
|
+
'.cjs': 'javascript',
|
|
167
|
+
'.py': 'python',
|
|
168
|
+
'.pyi': 'python',
|
|
169
169
|
'.go': 'go',
|
|
170
170
|
'.java': 'java',
|
|
171
171
|
'.rs': 'rust',
|
|
172
|
-
'.cpp': 'cpp',
|
|
172
|
+
'.cpp': 'cpp',
|
|
173
|
+
'.cc': 'cpp',
|
|
174
|
+
'.cxx': 'cpp',
|
|
175
|
+
'.c': 'c',
|
|
176
|
+
'.h': 'cpp',
|
|
177
|
+
'.hpp': 'cpp',
|
|
173
178
|
'.cs': 'csharp',
|
|
174
179
|
'.scala': 'scala',
|
|
175
|
-
'.rb': 'ruby',
|
|
176
|
-
'.
|
|
177
|
-
'.
|
|
178
|
-
'.
|
|
179
|
-
'.
|
|
180
|
-
'.
|
|
181
|
-
'.
|
|
182
|
-
'.
|
|
183
|
-
'.
|
|
184
|
-
'.
|
|
180
|
+
'.rb': 'ruby',
|
|
181
|
+
'.php': 'php',
|
|
182
|
+
'.swift': 'swift',
|
|
183
|
+
'.kt': 'kotlin',
|
|
184
|
+
'.kts': 'kotlin',
|
|
185
|
+
'.lua': 'lua',
|
|
186
|
+
'.sh': 'bash',
|
|
187
|
+
'.bash': 'bash',
|
|
188
|
+
'.zsh': 'bash',
|
|
189
|
+
'.sql': 'sql',
|
|
190
|
+
'.r': 'r',
|
|
191
|
+
'.R': 'r',
|
|
192
|
+
'.dart': 'dart',
|
|
193
|
+
'.ex': 'elixir',
|
|
194
|
+
'.exs': 'elixir',
|
|
195
|
+
'.hs': 'haskell',
|
|
196
|
+
'.ml': 'ocaml',
|
|
197
|
+
'.vue': 'vue',
|
|
198
|
+
'.svelte': 'svelte',
|
|
199
|
+
'.astro': 'astro',
|
|
200
|
+
'.yaml': 'yaml',
|
|
201
|
+
'.yml': 'yaml',
|
|
202
|
+
'.toml': 'toml',
|
|
203
|
+
'.json': 'json',
|
|
204
|
+
'.md': 'markdown',
|
|
205
|
+
'.mdx': 'markdown',
|
|
206
|
+
'.html': 'html',
|
|
207
|
+
'.css': 'css',
|
|
208
|
+
'.scss': 'scss',
|
|
209
|
+
'.less': 'less',
|
|
185
210
|
};
|
|
186
211
|
return map[ext.toLowerCase()] ?? 'unknown';
|
|
187
212
|
}
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type { Embedding } from '../embedding/types.js';
|
|
2
|
+
import type { VectorDB } from '../vectordb/types.js';
|
|
3
|
+
export interface TargetedIndexResult {
|
|
4
|
+
processedFiles: number;
|
|
5
|
+
totalChunks: number;
|
|
6
|
+
skippedFiles: number;
|
|
7
|
+
durationMs: number;
|
|
8
|
+
}
|
|
9
|
+
/**
|
|
10
|
+
* Re-index a specific set of files within a project.
|
|
11
|
+
* For each file: delete stale vectors, re-split, re-embed, re-insert, update snapshot.
|
|
12
|
+
*
|
|
13
|
+
* @param rootPath Absolute path to the project root
|
|
14
|
+
* @param relativePaths Relative paths (from rootPath) of files to re-index
|
|
15
|
+
* @param embedding Embedding provider
|
|
16
|
+
* @param vectordb Vector DB provider
|
|
17
|
+
*/
|
|
18
|
+
export declare function indexFiles(rootPath: string, relativePaths: string[], embedding: Embedding, vectordb: VectorDB): Promise<TargetedIndexResult>;
|
|
19
|
+
//# sourceMappingURL=targeted-indexer.d.ts.map
|
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
import fs from 'node:fs';
|
|
2
|
+
import path from 'node:path';
|
|
3
|
+
import { randomUUID } from 'node:crypto';
|
|
4
|
+
import { AstSplitter } from '../splitter/ast.js';
|
|
5
|
+
import { LineSplitter } from '../splitter/line.js';
|
|
6
|
+
import { extensionToLanguage, buildSnapshot } from './sync.js';
|
|
7
|
+
import { getConfig } from '../config.js';
|
|
8
|
+
import { normalizePath, pathToCollectionName } from '../paths.js';
|
|
9
|
+
import { classifyFileCategory } from './file-category.js';
|
|
10
|
+
import { loadSnapshot, saveSnapshot } from './snapshot-io.js';
|
|
11
|
+
/**
|
|
12
|
+
* Re-index a specific set of files within a project.
|
|
13
|
+
* For each file: delete stale vectors, re-split, re-embed, re-insert, update snapshot.
|
|
14
|
+
*
|
|
15
|
+
* @param rootPath Absolute path to the project root
|
|
16
|
+
* @param relativePaths Relative paths (from rootPath) of files to re-index
|
|
17
|
+
* @param embedding Embedding provider
|
|
18
|
+
* @param vectordb Vector DB provider
|
|
19
|
+
*/
|
|
20
|
+
export async function indexFiles(rootPath, relativePaths, embedding, vectordb) {
|
|
21
|
+
const start = Date.now();
|
|
22
|
+
const normalizedRoot = normalizePath(rootPath);
|
|
23
|
+
const collectionName = pathToCollectionName(normalizedRoot);
|
|
24
|
+
const config = getConfig();
|
|
25
|
+
// Skip if the collection doesn't exist (codebase never indexed)
|
|
26
|
+
if (!(await vectordb.hasCollection(collectionName))) {
|
|
27
|
+
process.stderr.write(`[targeted-indexer] No collection for ${normalizedRoot} — codebase not indexed, skipping.\n`);
|
|
28
|
+
return {
|
|
29
|
+
processedFiles: 0,
|
|
30
|
+
totalChunks: 0,
|
|
31
|
+
skippedFiles: relativePaths.length,
|
|
32
|
+
durationMs: Date.now() - start,
|
|
33
|
+
};
|
|
34
|
+
}
|
|
35
|
+
const astSplitter = new AstSplitter();
|
|
36
|
+
const lineSplitter = new LineSplitter();
|
|
37
|
+
const allChunks = [];
|
|
38
|
+
const processedPaths = [];
|
|
39
|
+
const deletedPaths = [];
|
|
40
|
+
let skippedFiles = 0;
|
|
41
|
+
// Step 1: delete stale vectors and split files
|
|
42
|
+
const concurrency = config.indexingConcurrency;
|
|
43
|
+
for (let i = 0; i < relativePaths.length; i += concurrency) {
|
|
44
|
+
const batch = relativePaths.slice(i, i + concurrency);
|
|
45
|
+
const batchResults = await Promise.all(batch.map(async (relPath) => {
|
|
46
|
+
// Always remove stale vectors first
|
|
47
|
+
await vectordb.deleteByPath(collectionName, relPath);
|
|
48
|
+
const fullPath = path.join(normalizedRoot, relPath);
|
|
49
|
+
let code;
|
|
50
|
+
try {
|
|
51
|
+
code = fs.readFileSync(fullPath, 'utf-8');
|
|
52
|
+
}
|
|
53
|
+
catch (err) {
|
|
54
|
+
if (err.code === 'ENOENT') {
|
|
55
|
+
// File was deleted — vectors removed, skip re-embedding
|
|
56
|
+
return { relPath, chunks: [], deleted: true };
|
|
57
|
+
}
|
|
58
|
+
throw err;
|
|
59
|
+
}
|
|
60
|
+
if (code.trim().length === 0)
|
|
61
|
+
return { relPath, chunks: [], deleted: false };
|
|
62
|
+
const ext = path.extname(relPath);
|
|
63
|
+
const language = extensionToLanguage(ext);
|
|
64
|
+
let chunks = astSplitter.split(code, language, relPath);
|
|
65
|
+
if (chunks.length === 0) {
|
|
66
|
+
chunks = lineSplitter.split(code, language, relPath);
|
|
67
|
+
}
|
|
68
|
+
return { relPath, chunks, deleted: false };
|
|
69
|
+
}));
|
|
70
|
+
for (const { relPath, chunks, deleted } of batchResults) {
|
|
71
|
+
if (deleted) {
|
|
72
|
+
deletedPaths.push(relPath);
|
|
73
|
+
skippedFiles++;
|
|
74
|
+
}
|
|
75
|
+
else {
|
|
76
|
+
allChunks.push(...chunks);
|
|
77
|
+
processedPaths.push(relPath);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
// Step 2: embed and insert chunks
|
|
82
|
+
const batchSize = config.embeddingBatchSize;
|
|
83
|
+
let totalChunks = 0;
|
|
84
|
+
for (let i = 0; i < allChunks.length; i += batchSize) {
|
|
85
|
+
const batch = allChunks.slice(i, i + batchSize);
|
|
86
|
+
const texts = batch.map((c) => c.content);
|
|
87
|
+
const vectors = await embedding.embedBatch(texts);
|
|
88
|
+
const documents = batch.map((chunk, j) => ({
|
|
89
|
+
id: randomUUID(),
|
|
90
|
+
content: chunk.content,
|
|
91
|
+
vector: vectors[j],
|
|
92
|
+
relativePath: chunk.filePath,
|
|
93
|
+
startLine: chunk.startLine,
|
|
94
|
+
endLine: chunk.endLine,
|
|
95
|
+
fileExtension: path.extname(chunk.filePath),
|
|
96
|
+
language: chunk.language,
|
|
97
|
+
fileCategory: classifyFileCategory(chunk.filePath),
|
|
98
|
+
symbolName: chunk.symbolName,
|
|
99
|
+
symbolKind: chunk.symbolKind,
|
|
100
|
+
symbolSignature: chunk.symbolSignature,
|
|
101
|
+
parentSymbol: chunk.parentSymbol,
|
|
102
|
+
}));
|
|
103
|
+
await vectordb.insert(collectionName, documents);
|
|
104
|
+
totalChunks += batch.length;
|
|
105
|
+
}
|
|
106
|
+
// Step 3: update snapshot — refresh hashes for processed files, remove deleted
|
|
107
|
+
const snapshot = loadSnapshot(normalizedRoot);
|
|
108
|
+
if (snapshot) {
|
|
109
|
+
const freshSnapshot = buildSnapshot(normalizedRoot, processedPaths);
|
|
110
|
+
for (const relPath of processedPaths) {
|
|
111
|
+
if (freshSnapshot[relPath]) {
|
|
112
|
+
snapshot[relPath] = freshSnapshot[relPath];
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
for (const relPath of deletedPaths) {
|
|
116
|
+
Reflect.deleteProperty(snapshot, relPath);
|
|
117
|
+
}
|
|
118
|
+
saveSnapshot(normalizedRoot, snapshot);
|
|
119
|
+
}
|
|
120
|
+
return {
|
|
121
|
+
processedFiles: processedPaths.length,
|
|
122
|
+
totalChunks,
|
|
123
|
+
skippedFiles,
|
|
124
|
+
durationMs: Date.now() - start,
|
|
125
|
+
};
|
|
126
|
+
}
|
|
127
|
+
//# sourceMappingURL=targeted-indexer.js.map
|
|
@@ -1,17 +1,4 @@
|
|
|
1
1
|
import type { Config } from '../config.js';
|
|
2
2
|
import type { Embedding } from './types.js';
|
|
3
|
-
/**
|
|
4
|
-
* Create an Embedding instance based on the configured provider.
|
|
5
|
-
*
|
|
6
|
-
* - 'openai' Uses the OpenAI API directly (requires OPENAI_API_KEY).
|
|
7
|
-
* - 'ollama' Uses Ollama's OpenAI-compatible /v1/embeddings endpoint.
|
|
8
|
-
* No API key required; defaults to model "nomic-embed-text".
|
|
9
|
-
* - 'local' Uses any OpenAI-compatible server at OPENAI_BASE_URL.
|
|
10
|
-
* Useful for LM Studio, vLLM, LocalAI, etc.
|
|
11
|
-
*
|
|
12
|
-
* The key insight is that Ollama and most local servers expose an
|
|
13
|
-
* OpenAI-compatible embeddings API, so we reuse OpenAIEmbedding
|
|
14
|
-
* with different connection parameters rather than creating separate classes.
|
|
15
|
-
*/
|
|
16
3
|
export declare function createEmbedding(config: Config): Embedding;
|
|
17
4
|
//# sourceMappingURL=factory.d.ts.map
|
|
@@ -1,32 +1,15 @@
|
|
|
1
1
|
import { OpenAIEmbedding } from './openai.js';
|
|
2
|
-
/**
|
|
3
|
-
* Create an Embedding instance based on the configured provider.
|
|
4
|
-
*
|
|
5
|
-
* - 'openai' Uses the OpenAI API directly (requires OPENAI_API_KEY).
|
|
6
|
-
* - 'ollama' Uses Ollama's OpenAI-compatible /v1/embeddings endpoint.
|
|
7
|
-
* No API key required; defaults to model "nomic-embed-text".
|
|
8
|
-
* - 'local' Uses any OpenAI-compatible server at OPENAI_BASE_URL.
|
|
9
|
-
* Useful for LM Studio, vLLM, LocalAI, etc.
|
|
10
|
-
*
|
|
11
|
-
* The key insight is that Ollama and most local servers expose an
|
|
12
|
-
* OpenAI-compatible embeddings API, so we reuse OpenAIEmbedding
|
|
13
|
-
* with different connection parameters rather than creating separate classes.
|
|
14
|
-
*/
|
|
15
2
|
export function createEmbedding(config) {
|
|
16
3
|
switch (config.embeddingProvider) {
|
|
17
4
|
case 'openai':
|
|
18
5
|
return new OpenAIEmbedding();
|
|
19
6
|
case 'ollama':
|
|
20
|
-
// Ollama exposes OpenAI-compatible /v1/embeddings endpoint.
|
|
21
|
-
// It ignores the API key but the OpenAI SDK requires a non-empty string.
|
|
22
7
|
return new OpenAIEmbedding({
|
|
23
8
|
apiKey: config.openaiApiKey || 'ollama',
|
|
24
9
|
baseUrl: config.ollamaBaseUrl,
|
|
25
10
|
model: config.embeddingModel,
|
|
26
11
|
});
|
|
27
12
|
case 'local':
|
|
28
|
-
// Generic OpenAI-compatible endpoint (LM Studio, vLLM, LocalAI, etc.).
|
|
29
|
-
// API key is optional (many local servers skip auth).
|
|
30
13
|
return new OpenAIEmbedding({
|
|
31
14
|
apiKey: config.openaiApiKey || 'local',
|
|
32
15
|
baseUrl: config.openaiBaseUrl,
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { type Embedding, type EmbeddingVector } from './types.js';
|
|
1
|
+
import { type Embedding, type EmbeddingVector, type TokenEstimate } from './types.js';
|
|
2
2
|
export declare function contentHash(text: string): string;
|
|
3
3
|
export interface OpenAIEmbeddingOptions {
|
|
4
4
|
apiKey?: string;
|
|
@@ -22,19 +22,7 @@ export declare class OpenAIEmbedding implements Embedding {
|
|
|
22
22
|
private ensureInitialized;
|
|
23
23
|
embed(text: string): Promise<EmbeddingVector>;
|
|
24
24
|
embedBatch(texts: string[]): Promise<EmbeddingVector[]>;
|
|
25
|
-
|
|
26
|
-
* Estimate the token cost for embedding a set of texts.
|
|
27
|
-
* Rough heuristic: ~4 chars per token for code.
|
|
28
|
-
* Cost rates are model-specific; local models (Ollama, etc.) are free.
|
|
29
|
-
*/
|
|
30
|
-
estimateTokens(texts: string[]): {
|
|
31
|
-
totalChars: number;
|
|
32
|
-
estimatedTokens: number;
|
|
33
|
-
estimatedCostUsd: number;
|
|
34
|
-
};
|
|
35
|
-
/**
|
|
36
|
-
* Set a value in the memory cache, evicting the oldest entry if at capacity.
|
|
37
|
-
*/
|
|
25
|
+
estimateTokens(texts: string[]): TokenEstimate;
|
|
38
26
|
private setMemoryCache;
|
|
39
27
|
private callWithRetry;
|
|
40
28
|
private callApi;
|
package/dist/embedding/openai.js
CHANGED
|
@@ -115,21 +115,14 @@ export class OpenAIEmbedding {
|
|
|
115
115
|
const hash = contentHash(texts[idx]);
|
|
116
116
|
const vec = freshEmbeddings[i];
|
|
117
117
|
this.setMemoryCache(hash, vec);
|
|
118
|
-
// Fire-and-forget: don't await the disk write
|
|
119
118
|
this.writeDiskCache(hash, vec);
|
|
120
119
|
results[idx] = vec;
|
|
121
120
|
}
|
|
122
|
-
|
|
123
|
-
if (results.some(r => r === null)) {
|
|
121
|
+
if (results.some((r) => r === null)) {
|
|
124
122
|
throw new EmbeddingError('Missing embeddings: some texts did not receive vectors after cache lookup and API call.');
|
|
125
123
|
}
|
|
126
124
|
return results;
|
|
127
125
|
}
|
|
128
|
-
/**
|
|
129
|
-
* Estimate the token cost for embedding a set of texts.
|
|
130
|
-
* Rough heuristic: ~4 chars per token for code.
|
|
131
|
-
* Cost rates are model-specific; local models (Ollama, etc.) are free.
|
|
132
|
-
*/
|
|
133
126
|
estimateTokens(texts) {
|
|
134
127
|
const totalChars = texts.reduce((sum, t) => sum + t.length, 0);
|
|
135
128
|
const estimatedTokens = Math.ceil(totalChars / 4);
|
|
@@ -137,15 +130,12 @@ export class OpenAIEmbedding {
|
|
|
137
130
|
const COST_PER_MILLION = {
|
|
138
131
|
'text-embedding-3-small': 0.02,
|
|
139
132
|
'text-embedding-3-large': 0.13,
|
|
140
|
-
'text-embedding-ada-002': 0.
|
|
133
|
+
'text-embedding-ada-002': 0.1,
|
|
141
134
|
};
|
|
142
135
|
const rate = COST_PER_MILLION[this.model] ?? 0;
|
|
143
136
|
const estimatedCostUsd = (estimatedTokens / 1_000_000) * rate;
|
|
144
137
|
return { totalChars, estimatedTokens, estimatedCostUsd };
|
|
145
138
|
}
|
|
146
|
-
/**
|
|
147
|
-
* Set a value in the memory cache, evicting the oldest entry if at capacity.
|
|
148
|
-
*/
|
|
149
139
|
setMemoryCache(hash, vec) {
|
|
150
140
|
if (this.memoryCache.size >= MAX_MEMORY_CACHE_SIZE && !this.memoryCache.has(hash)) {
|
|
151
141
|
// Delete the oldest entry (first key from the iterator)
|
|
@@ -192,7 +182,6 @@ export class OpenAIEmbedding {
|
|
|
192
182
|
await sleep(delay);
|
|
193
183
|
}
|
|
194
184
|
}
|
|
195
|
-
// Unreachable but satisfies TypeScript compiler
|
|
196
185
|
throw new EmbeddingError('Unexpected: exhausted retries');
|
|
197
186
|
}
|
|
198
187
|
async callApi(texts) {
|
|
@@ -200,9 +189,8 @@ export class OpenAIEmbedding {
|
|
|
200
189
|
model: this.model,
|
|
201
190
|
input: texts.map(truncateToSafeLength),
|
|
202
191
|
});
|
|
203
|
-
// Sort by index to guarantee order (API may return out-of-order)
|
|
204
192
|
const sorted = response.data.sort((a, b) => a.index - b.index);
|
|
205
|
-
return sorted.map(d => d.embedding);
|
|
193
|
+
return sorted.map((d) => d.embedding);
|
|
206
194
|
}
|
|
207
195
|
getDiskCachePath(hash) {
|
|
208
196
|
// Shard into subdirectories to avoid too many files in one dir
|
|
@@ -230,14 +218,13 @@ export class OpenAIEmbedding {
|
|
|
230
218
|
writeDiskCache(hash, vector) {
|
|
231
219
|
const filepath = this.getDiskCachePath(hash);
|
|
232
220
|
// Fire-and-forget async write
|
|
233
|
-
fsp
|
|
221
|
+
fsp
|
|
222
|
+
.mkdir(path.dirname(filepath), { recursive: true })
|
|
234
223
|
.then(() => fsp.writeFile(filepath, JSON.stringify(vector)))
|
|
235
|
-
.catch(() => {
|
|
236
|
-
// Non-fatal: cache write failure doesn't block indexing
|
|
237
|
-
});
|
|
224
|
+
.catch(() => { });
|
|
238
225
|
}
|
|
239
226
|
}
|
|
240
227
|
function sleep(ms) {
|
|
241
|
-
return new Promise(resolve => setTimeout(resolve, ms));
|
|
228
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
242
229
|
}
|
|
243
230
|
//# sourceMappingURL=openai.js.map
|
package/dist/errors.d.ts
CHANGED
package/dist/errors.js
CHANGED
package/dist/format.d.ts
CHANGED
|
@@ -1,12 +1,24 @@
|
|
|
1
|
+
import type { CleanupResult } from './core/cleanup.js';
|
|
1
2
|
import type { PreviewResult, IndexResult } from './core/indexer.js';
|
|
3
|
+
import type { DocIndexResult } from './core/doc-indexer.js';
|
|
4
|
+
import type { DocSearchResult } from './core/doc-searcher.js';
|
|
2
5
|
import type { CodebaseState } from './state/snapshot.js';
|
|
6
|
+
import type { MemoryItem, MemoryAction } from './memory/types.js';
|
|
7
|
+
import type { HistoryEntry } from './memory/history.js';
|
|
3
8
|
export declare function textResult(text: string): {
|
|
4
9
|
content: {
|
|
5
10
|
type: "text";
|
|
6
11
|
text: string;
|
|
7
12
|
}[];
|
|
8
13
|
};
|
|
14
|
+
export declare function formatCleanupResult(result: CleanupResult, normalizedPath: string, dryRun: boolean): string;
|
|
9
15
|
export declare function formatIndexResult(result: IndexResult, normalizedPath: string): string;
|
|
10
16
|
export declare function formatPreview(preview: PreviewResult, rootPath: string): string;
|
|
11
17
|
export declare function formatListIndexed(states: CodebaseState[]): string;
|
|
18
|
+
export declare function formatDocIndexResult(result: DocIndexResult): string;
|
|
19
|
+
export declare function formatDocSearchResults(results: DocSearchResult[], query: string): string;
|
|
20
|
+
export declare function formatMemoryActions(actions: MemoryAction[]): string;
|
|
21
|
+
export declare function formatMemorySearchResults(items: MemoryItem[], query: string): string;
|
|
22
|
+
export declare function formatMemoryList(items: MemoryItem[]): string;
|
|
23
|
+
export declare function formatMemoryHistory(entries: HistoryEntry[], memoryId: string): string;
|
|
12
24
|
//# sourceMappingURL=format.d.ts.map
|