@ez-corp/ez-search 1.0.3 → 1.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -235,6 +235,8 @@ export async function runIndex(targetPath, options) {
|
|
|
235
235
|
// 3. Handle --clear
|
|
236
236
|
// rmSync removes .ez-search/ entirely (including manifest.json inside it)
|
|
237
237
|
if (options.clear) {
|
|
238
|
+
col768.close();
|
|
239
|
+
col512.close();
|
|
238
240
|
rmSync(storagePath, { recursive: true, force: true });
|
|
239
241
|
const reopened = openProjectCollections(absPath);
|
|
240
242
|
col768 = reopened.col768;
|
|
@@ -390,11 +392,13 @@ export async function runIndex(targetPath, options) {
|
|
|
390
392
|
const format = options.format === 'text' ? 'text' : 'json';
|
|
391
393
|
emitError({ code: 'EMPTY_DIR', message: 'No supported files found in directory', suggestion: 'Ensure the directory contains supported file types (.ts, .js, .py, .go, .rs, .c, .cpp, .md, .txt, .jpg, .png, .webp)' }, format);
|
|
392
394
|
}
|
|
393
|
-
// 6. Optimize THEN save manifest
|
|
395
|
+
// 6. Optimize, close collections, THEN save manifest
|
|
394
396
|
col768.optimize();
|
|
397
|
+
col768.close();
|
|
395
398
|
if (imageFilesProcessed) {
|
|
396
399
|
col512.optimize();
|
|
397
400
|
}
|
|
401
|
+
col512.close();
|
|
398
402
|
saveManifest(absPath, manifest);
|
|
399
403
|
// 7. Output results
|
|
400
404
|
const durationMs = Date.now() - startTime;
|
|
@@ -53,176 +53,181 @@ export async function runQuery(text, options) {
|
|
|
53
53
|
staleFileCount = await calcStaleness(projectDir, manifest, true);
|
|
54
54
|
}
|
|
55
55
|
const isStale = staleFileCount > 0;
|
|
56
|
-
// 3. Open
|
|
57
|
-
const {
|
|
58
|
-
const
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
const
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
56
|
+
// 3. Open only the text/code collection (query never needs col-512 for images)
|
|
57
|
+
const { openCollection } = await import('../../services/vector-db.js');
|
|
58
|
+
const col768 = openCollection(projectDir, 'col-768');
|
|
59
|
+
try {
|
|
60
|
+
let typesToQuery;
|
|
61
|
+
if (options.type) {
|
|
62
|
+
typesToQuery = [options.type];
|
|
63
|
+
}
|
|
64
|
+
else {
|
|
65
|
+
// Pre-detect indexed types from manifest: only load models for types that have data.
|
|
66
|
+
// This avoids loading Jina when only text is indexed (or Nomic when only code is indexed).
|
|
67
|
+
const { EXTENSION_MAP } = await import('../../types.js');
|
|
68
|
+
const indexedTypes = new Set();
|
|
69
|
+
for (const filePath of Object.keys(manifest.files)) {
|
|
70
|
+
const ext = '.' + filePath.split('.').pop()?.toLowerCase();
|
|
71
|
+
const fileType = EXTENSION_MAP[ext];
|
|
72
|
+
if (fileType)
|
|
73
|
+
indexedTypes.add(fileType);
|
|
74
|
+
}
|
|
75
|
+
typesToQuery = [];
|
|
76
|
+
if (indexedTypes.has('code'))
|
|
77
|
+
typesToQuery.push('code');
|
|
78
|
+
if (indexedTypes.has('text'))
|
|
79
|
+
typesToQuery.push('text');
|
|
80
|
+
// image queries from text not supported — skip even if images are indexed
|
|
81
|
+
}
|
|
82
|
+
// Early exit when manifest exists but has no queryable types (e.g., after --clear without re-indexing)
|
|
83
|
+
if (typesToQuery.length === 0) {
|
|
84
|
+
const { emitError } = await import('../errors.js');
|
|
85
|
+
emitError({ code: 'NO_INDEX', message: 'No indexed content found', suggestion: 'Run `ez-search index .` first' }, options.format === 'text' ? 'text' : 'json');
|
|
73
86
|
}
|
|
74
|
-
|
|
75
|
-
if (
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
// ── Helpers ──────────────────────────────────────────────────────────────
|
|
96
|
-
const { normalizeResults, filterAndCollapse } = await import('../../services/query-utils.js');
|
|
97
|
-
const hasPostFilters = options.dir !== undefined || threshold !== undefined;
|
|
98
|
-
// Over-fetch for mixed col-768 + optional post-filters
|
|
99
|
-
const fetchCount = topK * 5 * (hasPostFilters ? 3 : 1);
|
|
100
|
-
// ── Execute per-type queries sequentially (memory conservation) ──────────
|
|
101
|
-
const { createEmbeddingPipeline } = await import('../../services/model-router.js');
|
|
102
|
-
let codeResults = [];
|
|
103
|
-
let textResults = [];
|
|
104
|
-
if (typesToQuery.includes('code')) {
|
|
105
|
-
// Code: Jina embedding, filter for jina modelId
|
|
106
|
-
let pipe = null;
|
|
107
|
-
try {
|
|
108
|
-
pipe = await createEmbeddingPipeline('code');
|
|
109
|
-
const [queryEmbedding] = await pipe.embed([text]);
|
|
110
|
-
let rawResults;
|
|
87
|
+
// Handle unsupported image query
|
|
88
|
+
if (options.type === 'image') {
|
|
89
|
+
const { emitError } = await import('../errors.js');
|
|
90
|
+
emitError({
|
|
91
|
+
code: 'UNSUPPORTED_TYPE',
|
|
92
|
+
message: 'Image search requires image query input (not yet supported)',
|
|
93
|
+
suggestion: 'Omit --type image to search code and text',
|
|
94
|
+
}, options.format === 'text' ? 'text' : 'json');
|
|
95
|
+
}
|
|
96
|
+
// ── Helpers ──────────────────────────────────────────────────────────────
|
|
97
|
+
const { normalizeResults, filterAndCollapse } = await import('../../services/query-utils.js');
|
|
98
|
+
const hasPostFilters = options.dir !== undefined || threshold !== undefined;
|
|
99
|
+
// Over-fetch for mixed col-768 + optional post-filters
|
|
100
|
+
const fetchCount = topK * 5 * (hasPostFilters ? 3 : 1);
|
|
101
|
+
// ── Execute per-type queries sequentially (memory conservation) ──────────
|
|
102
|
+
const { createEmbeddingPipeline } = await import('../../services/model-router.js');
|
|
103
|
+
let codeResults = [];
|
|
104
|
+
let textResults = [];
|
|
105
|
+
if (typesToQuery.includes('code')) {
|
|
106
|
+
// Code: Jina embedding, filter for jina modelId
|
|
107
|
+
let pipe = null;
|
|
111
108
|
try {
|
|
112
|
-
|
|
109
|
+
pipe = await createEmbeddingPipeline('code');
|
|
110
|
+
const [queryEmbedding] = await pipe.embed([text]);
|
|
111
|
+
let rawResults;
|
|
112
|
+
try {
|
|
113
|
+
rawResults = col768.query(queryEmbedding, fetchCount);
|
|
114
|
+
}
|
|
115
|
+
catch {
|
|
116
|
+
rawResults = [];
|
|
117
|
+
}
|
|
118
|
+
const normalized = normalizeResults(rawResults);
|
|
119
|
+
codeResults = filterAndCollapse(normalized, (id) => id.includes('jina') || id.startsWith('jinaai/'), { threshold, dir: options.dir, topK });
|
|
113
120
|
}
|
|
114
|
-
catch {
|
|
115
|
-
|
|
121
|
+
catch (err) {
|
|
122
|
+
process.stderr.write(`[query] code pipeline error: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
123
|
+
}
|
|
124
|
+
finally {
|
|
125
|
+
if (pipe)
|
|
126
|
+
await pipe.dispose();
|
|
116
127
|
}
|
|
117
|
-
const normalized = normalizeResults(rawResults);
|
|
118
|
-
codeResults = filterAndCollapse(normalized, (id) => id.includes('jina') || id.startsWith('jinaai/'), { threshold, dir: options.dir, topK });
|
|
119
|
-
}
|
|
120
|
-
catch (err) {
|
|
121
|
-
process.stderr.write(`[query] code pipeline error: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
122
|
-
}
|
|
123
|
-
finally {
|
|
124
|
-
if (pipe)
|
|
125
|
-
await pipe.dispose();
|
|
126
128
|
}
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
let pipe = null;
|
|
131
|
-
try {
|
|
132
|
-
pipe = await createEmbeddingPipeline('text');
|
|
133
|
-
const prefixedQuery = `search_query: ${text}`;
|
|
134
|
-
const [queryEmbedding] = await pipe.embed([prefixedQuery]);
|
|
135
|
-
let rawResults;
|
|
129
|
+
if (typesToQuery.includes('text')) {
|
|
130
|
+
// Text: Nomic embedding with "search_query: " prefix, filter for nomic modelId
|
|
131
|
+
let pipe = null;
|
|
136
132
|
try {
|
|
137
|
-
|
|
133
|
+
pipe = await createEmbeddingPipeline('text');
|
|
134
|
+
const prefixedQuery = `search_query: ${text}`;
|
|
135
|
+
const [queryEmbedding] = await pipe.embed([prefixedQuery]);
|
|
136
|
+
let rawResults;
|
|
137
|
+
try {
|
|
138
|
+
rawResults = col768.query(queryEmbedding, fetchCount);
|
|
139
|
+
}
|
|
140
|
+
catch {
|
|
141
|
+
rawResults = [];
|
|
142
|
+
}
|
|
143
|
+
const normalized = normalizeResults(rawResults);
|
|
144
|
+
textResults = filterAndCollapse(normalized, (id) => id.includes('nomic'), { threshold, dir: options.dir, topK });
|
|
138
145
|
}
|
|
139
|
-
catch {
|
|
140
|
-
|
|
146
|
+
catch (err) {
|
|
147
|
+
process.stderr.write(`[query] text pipeline error: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
148
|
+
}
|
|
149
|
+
finally {
|
|
150
|
+
if (pipe)
|
|
151
|
+
await pipe.dispose();
|
|
141
152
|
}
|
|
142
|
-
const normalized = normalizeResults(rawResults);
|
|
143
|
-
textResults = filterAndCollapse(normalized, (id) => id.includes('nomic'), { threshold, dir: options.dir, topK });
|
|
144
|
-
}
|
|
145
|
-
catch (err) {
|
|
146
|
-
process.stderr.write(`[query] text pipeline error: ${err instanceof Error ? err.message : String(err)}\n`);
|
|
147
|
-
}
|
|
148
|
-
finally {
|
|
149
|
-
if (pipe)
|
|
150
|
-
await pipe.dispose();
|
|
151
|
-
}
|
|
152
|
-
}
|
|
153
|
-
// ── Output ────────────────────────────────────────────────────────────────
|
|
154
|
-
const hasCodeResults = codeResults.length > 0;
|
|
155
|
-
const hasTextResults = textResults.length > 0;
|
|
156
|
-
const hasResults = hasCodeResults || hasTextResults;
|
|
157
|
-
if (options.format === 'text') {
|
|
158
|
-
if (autoIndexResult) {
|
|
159
|
-
console.log(`Auto-indexed ${autoIndexResult.filesIndexed} files in ${(autoIndexResult.durationMs / 1000).toFixed(1)}s\n`);
|
|
160
|
-
}
|
|
161
|
-
if (isStale) {
|
|
162
|
-
console.log(`Warning: ${staleFileCount} file(s) changed since last index. Run \`ez-search index .\` to update.\n`);
|
|
163
|
-
}
|
|
164
|
-
if (!hasResults) {
|
|
165
|
-
console.log('No results found.');
|
|
166
|
-
return;
|
|
167
153
|
}
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
154
|
+
// ── Output ────────────────────────────────────────────────────────────────
|
|
155
|
+
const hasCodeResults = codeResults.length > 0;
|
|
156
|
+
const hasTextResults = textResults.length > 0;
|
|
157
|
+
const hasResults = hasCodeResults || hasTextResults;
|
|
158
|
+
if (options.format === 'text') {
|
|
159
|
+
if (autoIndexResult) {
|
|
160
|
+
console.log(`Auto-indexed ${autoIndexResult.filesIndexed} files in ${(autoIndexResult.durationMs / 1000).toFixed(1)}s\n`);
|
|
161
|
+
}
|
|
162
|
+
if (isStale) {
|
|
163
|
+
console.log(`Warning: ${staleFileCount} file(s) changed since last index. Run \`ez-search index .\` to update.\n`);
|
|
164
|
+
}
|
|
165
|
+
if (!hasResults) {
|
|
166
|
+
console.log('No results found.');
|
|
167
|
+
return;
|
|
168
|
+
}
|
|
169
|
+
if (hasCodeResults) {
|
|
170
|
+
console.log('## Code\n');
|
|
171
|
+
for (const r of codeResults) {
|
|
172
|
+
console.log(`File: ${r.filePath} | Lines: ${r.lineStart}-${r.lineEnd} | Relevance: ${r.score}`);
|
|
173
|
+
for (const line of r.chunkText.split('\n')) {
|
|
174
|
+
console.log(` ${line}`);
|
|
175
|
+
}
|
|
176
|
+
console.log();
|
|
174
177
|
}
|
|
175
|
-
console.log();
|
|
176
178
|
}
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
179
|
+
if (hasTextResults) {
|
|
180
|
+
console.log('## Text\n');
|
|
181
|
+
for (const r of textResults) {
|
|
182
|
+
console.log(`File: ${r.filePath} | Relevance: ${r.score}`);
|
|
183
|
+
for (const line of r.chunkText.split('\n')) {
|
|
184
|
+
console.log(` ${line}`);
|
|
185
|
+
}
|
|
186
|
+
console.log();
|
|
184
187
|
}
|
|
185
|
-
console.log();
|
|
186
188
|
}
|
|
187
189
|
}
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
searchScope: options.dir ?? '.',
|
|
195
|
-
};
|
|
196
|
-
if (autoIndexResult) {
|
|
197
|
-
output['indexing'] = {
|
|
198
|
-
status: autoIndexResult.status,
|
|
199
|
-
filesIndexed: autoIndexResult.filesIndexed,
|
|
200
|
-
durationMs: autoIndexResult.durationMs,
|
|
190
|
+
else {
|
|
191
|
+
// JSON grouped envelope
|
|
192
|
+
const output = {
|
|
193
|
+
query: text,
|
|
194
|
+
totalIndexed,
|
|
195
|
+
searchScope: options.dir ?? '.',
|
|
201
196
|
};
|
|
197
|
+
if (autoIndexResult) {
|
|
198
|
+
output['indexing'] = {
|
|
199
|
+
status: autoIndexResult.status,
|
|
200
|
+
filesIndexed: autoIndexResult.filesIndexed,
|
|
201
|
+
durationMs: autoIndexResult.durationMs,
|
|
202
|
+
};
|
|
203
|
+
}
|
|
204
|
+
if (isStale) {
|
|
205
|
+
output['stale'] = true;
|
|
206
|
+
output['staleFileCount'] = staleFileCount;
|
|
207
|
+
}
|
|
208
|
+
if (hasCodeResults) {
|
|
209
|
+
output['code'] = codeResults.map((r) => ({
|
|
210
|
+
file: r.filePath,
|
|
211
|
+
lines: { start: r.lineStart, end: r.lineEnd },
|
|
212
|
+
score: r.score,
|
|
213
|
+
text: r.chunkText,
|
|
214
|
+
}));
|
|
215
|
+
}
|
|
216
|
+
if (hasTextResults) {
|
|
217
|
+
output['text'] = textResults.map((r) => ({
|
|
218
|
+
file: r.filePath,
|
|
219
|
+
score: r.score,
|
|
220
|
+
text: r.chunkText,
|
|
221
|
+
}));
|
|
222
|
+
}
|
|
223
|
+
if (!hasResults) {
|
|
224
|
+
output['message'] = 'No results found';
|
|
225
|
+
}
|
|
226
|
+
console.log(JSON.stringify(output));
|
|
202
227
|
}
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
}
|
|
207
|
-
if (hasCodeResults) {
|
|
208
|
-
output['code'] = codeResults.map((r) => ({
|
|
209
|
-
file: r.filePath,
|
|
210
|
-
lines: { start: r.lineStart, end: r.lineEnd },
|
|
211
|
-
score: r.score,
|
|
212
|
-
text: r.chunkText,
|
|
213
|
-
}));
|
|
214
|
-
}
|
|
215
|
-
if (hasTextResults) {
|
|
216
|
-
output['text'] = textResults.map((r) => ({
|
|
217
|
-
file: r.filePath,
|
|
218
|
-
score: r.score,
|
|
219
|
-
text: r.chunkText,
|
|
220
|
-
}));
|
|
221
|
-
}
|
|
222
|
-
if (!hasResults) {
|
|
223
|
-
output['message'] = 'No results found';
|
|
224
|
-
}
|
|
225
|
-
console.log(JSON.stringify(output));
|
|
228
|
+
}
|
|
229
|
+
finally {
|
|
230
|
+
col768.close();
|
|
226
231
|
}
|
|
227
232
|
}
|
|
228
233
|
catch (err) {
|
|
@@ -101,7 +101,11 @@ function createCollection(storageDir, name, dim) {
|
|
|
101
101
|
chunkText: String(metadata['chunkText'] ?? ''),
|
|
102
102
|
},
|
|
103
103
|
};
|
|
104
|
-
|
|
104
|
+
let status = handle.insertSync(doc);
|
|
105
|
+
if (!status.ok && status.code === 'ZVEC_ALREADY_EXISTS') {
|
|
106
|
+
handle.deleteSync(id);
|
|
107
|
+
status = handle.insertSync(doc);
|
|
108
|
+
}
|
|
105
109
|
if (!status.ok) {
|
|
106
110
|
throw new Error(`Zvec insert failed for id="${id}": code=${status.code} ${status.message}`);
|
|
107
111
|
}
|
|
@@ -136,9 +140,7 @@ function createCollection(storageDir, name, dim) {
|
|
|
136
140
|
handle.optimizeSync();
|
|
137
141
|
},
|
|
138
142
|
close() {
|
|
139
|
-
|
|
140
|
-
// Call destroySync only if you want to delete the data from disk.
|
|
141
|
-
// For normal close, we do nothing — the GC handles cleanup.
|
|
143
|
+
handle.closeSync();
|
|
142
144
|
},
|
|
143
145
|
};
|
|
144
146
|
}
|
|
@@ -159,3 +161,14 @@ export function openProjectCollections(projectDir) {
|
|
|
159
161
|
const col512 = createCollection(storageDir, 'col-512', 512);
|
|
160
162
|
return { col768, col512, storagePath: storageDir };
|
|
161
163
|
}
|
|
164
|
+
/**
|
|
165
|
+
* Open a single vector collection by name.
|
|
166
|
+
* Use this when you only need one collection (e.g. query only needs col-768).
|
|
167
|
+
*/
|
|
168
|
+
export function openCollection(projectDir, name) {
|
|
169
|
+
const storageDir = resolveProjectStoragePath(projectDir);
|
|
170
|
+
mkdirSync(storageDir, { recursive: true });
|
|
171
|
+
ensureSchemaVersion(storageDir);
|
|
172
|
+
const dim = name === 'col-768' ? 768 : 512;
|
|
173
|
+
return createCollection(storageDir, name, dim);
|
|
174
|
+
}
|