ownsearch 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +68 -0
- package/dist/chunk-NLETDGQ5.js +726 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.js +198 -0
- package/dist/mcp/server.d.ts +1 -0
- package/dist/mcp/server.js +242 -0
- package/package.json +61 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# ownsearch
|
|
2
|
+
|
|
3
|
+
`ownsearch` is a text-first local semantic search package that indexes folders into Qdrant and exposes retrieval tools through an MCP server.
|
|
4
|
+
|
|
5
|
+
V1 scope:
|
|
6
|
+
|
|
7
|
+
- text and code files first
|
|
8
|
+
- extracted text from PDFs
|
|
9
|
+
- Gemini embeddings with `gemini-embedding-001`
|
|
10
|
+
- Docker-backed Qdrant storage
|
|
11
|
+
- stdio MCP server for agent attachment
|
|
12
|
+
|
|
13
|
+
## Install
|
|
14
|
+
|
|
15
|
+
For local development before publish:
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
npm install
|
|
19
|
+
npm run build
|
|
20
|
+
npm link
|
|
21
|
+
```
|
|
22
|
+
|
|
23
|
+
After publish, the intended install is:
|
|
24
|
+
|
|
25
|
+
```bash
|
|
26
|
+
npm install -g ownsearch
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
## Quickstart
|
|
30
|
+
|
|
31
|
+
```bash
|
|
32
|
+
ownsearch setup
|
|
33
|
+
ownsearch doctor
|
|
34
|
+
ownsearch index ./docs --name docs
|
|
35
|
+
ownsearch list-roots
|
|
36
|
+
ownsearch search "what is this repo about?" --limit 5
|
|
37
|
+
ownsearch search-context "what is this repo about?" --limit 8 --max-chars 12000
|
|
38
|
+
ownsearch serve-mcp
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
## Agent Config
|
|
42
|
+
|
|
43
|
+
Print a config snippet for your agent:
|
|
44
|
+
|
|
45
|
+
```bash
|
|
46
|
+
node dist/cli.js print-agent-config codex
|
|
47
|
+
node dist/cli.js print-agent-config claude-desktop
|
|
48
|
+
node dist/cli.js print-agent-config cursor
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## MCP Tools
|
|
52
|
+
|
|
53
|
+
- `index_path`
|
|
54
|
+
- `search`
|
|
55
|
+
- `search_context`
|
|
56
|
+
- `get_chunks`
|
|
57
|
+
- `list_roots`
|
|
58
|
+
- `delete_root`
|
|
59
|
+
- `store_status`
|
|
60
|
+
|
|
61
|
+
`index_path` is incremental. Re-running it on the same folder only re-embeds changed files and removes stale chunks.
|
|
62
|
+
|
|
63
|
+
## Notes
|
|
64
|
+
|
|
65
|
+
- Roots are stored in `~/.ownsearch/config.json`.
|
|
66
|
+
- Qdrant runs in Docker under the container name `ownsearch-qdrant`.
|
|
67
|
+
- Each indexed folder is stored as payload-filtered chunks inside a single Qdrant collection.
|
|
68
|
+
- Agents can call `search_context` for a ready-to-use context bundle, or call `search` first and then `get_chunks` for exact chunk retrieval.
|
|
@@ -0,0 +1,726 @@
|
|
|
1
|
+
// src/context.ts
|
|
2
|
+
function buildContextBundle(query, hits, maxChars = 12e3) {
|
|
3
|
+
const results = [];
|
|
4
|
+
let totalChars = 0;
|
|
5
|
+
for (const hit of hits) {
|
|
6
|
+
if (results.length > 0 && totalChars + hit.content.length > maxChars) {
|
|
7
|
+
break;
|
|
8
|
+
}
|
|
9
|
+
results.push({
|
|
10
|
+
id: hit.id,
|
|
11
|
+
score: hit.score,
|
|
12
|
+
rootId: hit.rootId,
|
|
13
|
+
rootName: hit.rootName,
|
|
14
|
+
relativePath: hit.relativePath,
|
|
15
|
+
chunkIndex: hit.chunkIndex,
|
|
16
|
+
content: hit.content
|
|
17
|
+
});
|
|
18
|
+
totalChars += hit.content.length;
|
|
19
|
+
}
|
|
20
|
+
return {
|
|
21
|
+
query,
|
|
22
|
+
totalChars,
|
|
23
|
+
results
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
// src/config.ts
|
|
28
|
+
import fs from "fs/promises";
|
|
29
|
+
import os from "os";
|
|
30
|
+
import path2 from "path";
|
|
31
|
+
|
|
32
|
+
// src/constants.ts
|
|
33
|
+
var CONFIG_DIR_NAME = ".ownsearch";
|
|
34
|
+
var CONFIG_FILE_NAME = "config.json";
|
|
35
|
+
var DEFAULT_COLLECTION = "text_gemini_embedding_001_768";
|
|
36
|
+
var DEFAULT_EMBEDDING_MODEL = "gemini-embedding-001";
|
|
37
|
+
var DEFAULT_VECTOR_SIZE = 768;
|
|
38
|
+
var DEFAULT_QDRANT_URL = "http://127.0.0.1:6333";
|
|
39
|
+
var DEFAULT_QDRANT_CONTAINER = "ownsearch-qdrant";
|
|
40
|
+
var DEFAULT_QDRANT_VOLUME = "ownsearch-qdrant-storage";
|
|
41
|
+
var DEFAULT_CHUNK_SIZE = 1200;
|
|
42
|
+
var DEFAULT_CHUNK_OVERLAP = 200;
|
|
43
|
+
var DEFAULT_MAX_FILE_BYTES = 50 * 1024 * 1024;
|
|
44
|
+
var SUPPORTED_TEXT_EXTENSIONS = /* @__PURE__ */ new Set([
|
|
45
|
+
".c",
|
|
46
|
+
".cpp",
|
|
47
|
+
".cs",
|
|
48
|
+
".css",
|
|
49
|
+
".csv",
|
|
50
|
+
".env",
|
|
51
|
+
".go",
|
|
52
|
+
".h",
|
|
53
|
+
".hpp",
|
|
54
|
+
".html",
|
|
55
|
+
".java",
|
|
56
|
+
".js",
|
|
57
|
+
".json",
|
|
58
|
+
".jsx",
|
|
59
|
+
".md",
|
|
60
|
+
".mdx",
|
|
61
|
+
".mjs",
|
|
62
|
+
".pdf",
|
|
63
|
+
".ps1",
|
|
64
|
+
".py",
|
|
65
|
+
".rb",
|
|
66
|
+
".rs",
|
|
67
|
+
".sh",
|
|
68
|
+
".sql",
|
|
69
|
+
".toml",
|
|
70
|
+
".ts",
|
|
71
|
+
".tsx",
|
|
72
|
+
".txt",
|
|
73
|
+
".xml",
|
|
74
|
+
".yaml",
|
|
75
|
+
".yml"
|
|
76
|
+
]);
|
|
77
|
+
var IGNORED_DIRECTORIES = /* @__PURE__ */ new Set([
|
|
78
|
+
".git",
|
|
79
|
+
".hg",
|
|
80
|
+
".idea",
|
|
81
|
+
".next",
|
|
82
|
+
".svn",
|
|
83
|
+
".turbo",
|
|
84
|
+
".venv",
|
|
85
|
+
".vscode",
|
|
86
|
+
"build",
|
|
87
|
+
"coverage",
|
|
88
|
+
"dist",
|
|
89
|
+
"node_modules",
|
|
90
|
+
"venv"
|
|
91
|
+
]);
|
|
92
|
+
|
|
93
|
+
// src/utils.ts
|
|
94
|
+
import crypto from "crypto";
|
|
95
|
+
import path from "path";
|
|
96
|
+
function toPosixPath(inputPath) {
|
|
97
|
+
return inputPath.split(path.sep).join("/");
|
|
98
|
+
}
|
|
99
|
+
function sha256(input) {
|
|
100
|
+
return crypto.createHash("sha256").update(input).digest("hex");
|
|
101
|
+
}
|
|
102
|
+
function hashToUuid(input) {
|
|
103
|
+
const hash = sha256(input);
|
|
104
|
+
const part1 = hash.slice(0, 8);
|
|
105
|
+
const part2 = hash.slice(8, 12);
|
|
106
|
+
const part3 = `5${hash.slice(13, 16)}`;
|
|
107
|
+
const variantNibble = parseInt(hash.slice(16, 17), 16) & 3 | 8;
|
|
108
|
+
const part4 = `${variantNibble.toString(16)}${hash.slice(17, 20)}`;
|
|
109
|
+
const part5 = hash.slice(20, 32);
|
|
110
|
+
return `${part1}-${part2}-${part3}-${part4}-${part5}`;
|
|
111
|
+
}
|
|
112
|
+
function normalizeVector(values) {
|
|
113
|
+
const magnitude = Math.sqrt(values.reduce((sum, value) => sum + value * value, 0));
|
|
114
|
+
if (magnitude === 0) {
|
|
115
|
+
return values;
|
|
116
|
+
}
|
|
117
|
+
return values.map((value) => value / magnitude);
|
|
118
|
+
}
|
|
119
|
+
function slugifyName(value) {
|
|
120
|
+
return value.trim().toLowerCase().replace(/[^a-z0-9]+/g, "-").replace(/^-+|-+$/g, "").slice(0, 64);
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// src/config.ts
|
|
124
|
+
function defaultConfig() {
|
|
125
|
+
return {
|
|
126
|
+
qdrantUrl: DEFAULT_QDRANT_URL,
|
|
127
|
+
qdrantCollection: DEFAULT_COLLECTION,
|
|
128
|
+
qdrantContainerName: DEFAULT_QDRANT_CONTAINER,
|
|
129
|
+
qdrantVolumeName: DEFAULT_QDRANT_VOLUME,
|
|
130
|
+
embeddingModel: DEFAULT_EMBEDDING_MODEL,
|
|
131
|
+
vectorSize: DEFAULT_VECTOR_SIZE,
|
|
132
|
+
chunkSize: DEFAULT_CHUNK_SIZE,
|
|
133
|
+
chunkOverlap: DEFAULT_CHUNK_OVERLAP,
|
|
134
|
+
maxFileBytes: DEFAULT_MAX_FILE_BYTES,
|
|
135
|
+
roots: []
|
|
136
|
+
};
|
|
137
|
+
}
|
|
138
|
+
function getConfigDir() {
|
|
139
|
+
return path2.join(os.homedir(), CONFIG_DIR_NAME);
|
|
140
|
+
}
|
|
141
|
+
function getConfigPath() {
|
|
142
|
+
return path2.join(getConfigDir(), CONFIG_FILE_NAME);
|
|
143
|
+
}
|
|
144
|
+
async function ensureConfigDir() {
|
|
145
|
+
await fs.mkdir(getConfigDir(), { recursive: true });
|
|
146
|
+
}
|
|
147
|
+
async function loadConfig() {
|
|
148
|
+
await ensureConfigDir();
|
|
149
|
+
const configPath = getConfigPath();
|
|
150
|
+
try {
|
|
151
|
+
const raw = await fs.readFile(configPath, "utf8");
|
|
152
|
+
const parsed = JSON.parse(raw);
|
|
153
|
+
const config = {
|
|
154
|
+
...defaultConfig(),
|
|
155
|
+
...parsed,
|
|
156
|
+
maxFileBytes: Math.max(parsed.maxFileBytes ?? DEFAULT_MAX_FILE_BYTES, DEFAULT_MAX_FILE_BYTES),
|
|
157
|
+
roots: parsed.roots ?? []
|
|
158
|
+
};
|
|
159
|
+
if (config.maxFileBytes !== parsed.maxFileBytes) {
|
|
160
|
+
await saveConfig(config);
|
|
161
|
+
}
|
|
162
|
+
return config;
|
|
163
|
+
} catch (error) {
|
|
164
|
+
const config = defaultConfig();
|
|
165
|
+
await saveConfig(config);
|
|
166
|
+
return config;
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
async function saveConfig(config) {
|
|
170
|
+
await ensureConfigDir();
|
|
171
|
+
await fs.writeFile(getConfigPath(), `${JSON.stringify(config, null, 2)}
|
|
172
|
+
`, "utf8");
|
|
173
|
+
}
|
|
174
|
+
function createRootDefinition(rootPath, name) {
|
|
175
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
176
|
+
const rootName = name?.trim() || path2.basename(rootPath);
|
|
177
|
+
return {
|
|
178
|
+
id: slugifyName(`${rootName}-${rootPath}`),
|
|
179
|
+
name: rootName,
|
|
180
|
+
path: path2.resolve(rootPath),
|
|
181
|
+
createdAt: now,
|
|
182
|
+
updatedAt: now
|
|
183
|
+
};
|
|
184
|
+
}
|
|
185
|
+
async function upsertRoot(rootPath, name) {
|
|
186
|
+
const config = await loadConfig();
|
|
187
|
+
const absolutePath = path2.resolve(rootPath);
|
|
188
|
+
const existing = config.roots.find((root2) => root2.path === absolutePath);
|
|
189
|
+
const now = (/* @__PURE__ */ new Date()).toISOString();
|
|
190
|
+
if (existing) {
|
|
191
|
+
existing.name = name?.trim() || existing.name;
|
|
192
|
+
existing.updatedAt = now;
|
|
193
|
+
await saveConfig(config);
|
|
194
|
+
return existing;
|
|
195
|
+
}
|
|
196
|
+
const root = createRootDefinition(absolutePath, name);
|
|
197
|
+
config.roots.push(root);
|
|
198
|
+
await saveConfig(config);
|
|
199
|
+
return root;
|
|
200
|
+
}
|
|
201
|
+
async function deleteRootDefinition(rootId) {
|
|
202
|
+
const config = await loadConfig();
|
|
203
|
+
const initialLength = config.roots.length;
|
|
204
|
+
config.roots = config.roots.filter((root) => root.id !== rootId);
|
|
205
|
+
await saveConfig(config);
|
|
206
|
+
return config.roots.length !== initialLength;
|
|
207
|
+
}
|
|
208
|
+
async function findRoot(rootId) {
|
|
209
|
+
const config = await loadConfig();
|
|
210
|
+
return config.roots.find((root) => root.id === rootId);
|
|
211
|
+
}
|
|
212
|
+
async function listRoots() {
|
|
213
|
+
const config = await loadConfig();
|
|
214
|
+
return config.roots;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
// src/errors.ts
|
|
218
|
+
var OwnSearchError = class extends Error {
|
|
219
|
+
constructor(message) {
|
|
220
|
+
super(message);
|
|
221
|
+
this.name = "OwnSearchError";
|
|
222
|
+
}
|
|
223
|
+
};
|
|
224
|
+
|
|
225
|
+
// src/gemini.ts
|
|
226
|
+
import { GoogleGenAI } from "@google/genai";
|
|
227
|
+
var client;
|
|
228
|
+
var MAX_EMBED_BATCH_SIZE = 20;
|
|
229
|
+
function getClient() {
|
|
230
|
+
const apiKey = process.env.GEMINI_API_KEY;
|
|
231
|
+
if (!apiKey) {
|
|
232
|
+
throw new OwnSearchError("GEMINI_API_KEY is required.");
|
|
233
|
+
}
|
|
234
|
+
if (!client) {
|
|
235
|
+
client = new GoogleGenAI({ apiKey });
|
|
236
|
+
}
|
|
237
|
+
return client;
|
|
238
|
+
}
|
|
239
|
+
async function embed(contents, taskType) {
|
|
240
|
+
const config = await loadConfig();
|
|
241
|
+
const vectors = [];
|
|
242
|
+
const debug = process.env.OWNSEARCH_DEBUG_INDEX === "1";
|
|
243
|
+
for (let index = 0; index < contents.length; index += MAX_EMBED_BATCH_SIZE) {
|
|
244
|
+
const batch = contents.slice(index, index + MAX_EMBED_BATCH_SIZE);
|
|
245
|
+
if (debug) {
|
|
246
|
+
console.log("[ownsearch:embed]", "batch", index / MAX_EMBED_BATCH_SIZE + 1, "size", batch.length, "chars", batch.reduce((sum, text) => sum + text.length, 0));
|
|
247
|
+
}
|
|
248
|
+
const response = await getClient().models.embedContent({
|
|
249
|
+
model: config.embeddingModel,
|
|
250
|
+
contents: batch,
|
|
251
|
+
config: {
|
|
252
|
+
taskType,
|
|
253
|
+
outputDimensionality: config.vectorSize
|
|
254
|
+
}
|
|
255
|
+
});
|
|
256
|
+
if (!response.embeddings?.length) {
|
|
257
|
+
throw new OwnSearchError("Gemini returned no embeddings.");
|
|
258
|
+
}
|
|
259
|
+
vectors.push(...response.embeddings.map((embedding) => normalizeVector(embedding.values ?? [])));
|
|
260
|
+
}
|
|
261
|
+
return vectors;
|
|
262
|
+
}
|
|
263
|
+
async function embedDocuments(contents) {
|
|
264
|
+
return embed(contents, "RETRIEVAL_DOCUMENT");
|
|
265
|
+
}
|
|
266
|
+
async function embedQuery(query) {
|
|
267
|
+
const [vector] = await embed([query], "RETRIEVAL_QUERY");
|
|
268
|
+
return vector;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
// src/qdrant.ts
|
|
272
|
+
import { QdrantClient } from "@qdrant/js-client-rest";
|
|
273
|
+
var OwnSearchStore = class {
|
|
274
|
+
constructor(client2, collectionName, vectorSize) {
|
|
275
|
+
this.client = client2;
|
|
276
|
+
this.collectionName = collectionName;
|
|
277
|
+
this.vectorSize = vectorSize;
|
|
278
|
+
}
|
|
279
|
+
async ensureCollection() {
|
|
280
|
+
const collections = await this.client.getCollections();
|
|
281
|
+
const exists = collections.collections.some((collection) => collection.name === this.collectionName);
|
|
282
|
+
if (!exists) {
|
|
283
|
+
await this.client.createCollection(this.collectionName, {
|
|
284
|
+
vectors: {
|
|
285
|
+
size: this.vectorSize,
|
|
286
|
+
distance: "Cosine"
|
|
287
|
+
}
|
|
288
|
+
});
|
|
289
|
+
} else {
|
|
290
|
+
const info = await this.client.getCollection(this.collectionName);
|
|
291
|
+
const vectorConfig = info.config?.params?.vectors;
|
|
292
|
+
const actualSize = vectorConfig && !Array.isArray(vectorConfig) && "size" in vectorConfig ? Number(vectorConfig.size) : void 0;
|
|
293
|
+
if (actualSize && actualSize !== this.vectorSize) {
|
|
294
|
+
throw new Error(
|
|
295
|
+
`Qdrant collection ${this.collectionName} has vector size ${actualSize}, expected ${this.vectorSize}.`
|
|
296
|
+
);
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
await Promise.allSettled([
|
|
300
|
+
this.client.createPayloadIndex(this.collectionName, {
|
|
301
|
+
field_name: "root_id",
|
|
302
|
+
field_schema: "keyword"
|
|
303
|
+
}),
|
|
304
|
+
this.client.createPayloadIndex(this.collectionName, {
|
|
305
|
+
field_name: "relative_path",
|
|
306
|
+
field_schema: "keyword"
|
|
307
|
+
})
|
|
308
|
+
]);
|
|
309
|
+
}
|
|
310
|
+
async upsertChunks(records, vectors) {
|
|
311
|
+
const batchSize = 50;
|
|
312
|
+
for (let index = 0; index < records.length; index += batchSize) {
|
|
313
|
+
const batchRecords = records.slice(index, index + batchSize);
|
|
314
|
+
const batchVectors = vectors.slice(index, index + batchSize);
|
|
315
|
+
const points = batchRecords.map((record, batchIndex) => ({
|
|
316
|
+
id: record.id,
|
|
317
|
+
vector: batchVectors[batchIndex],
|
|
318
|
+
payload: {
|
|
319
|
+
root_id: record.rootId,
|
|
320
|
+
root_name: record.rootName,
|
|
321
|
+
root_path: record.rootPath,
|
|
322
|
+
file_path: record.filePath,
|
|
323
|
+
relative_path: record.relativePath,
|
|
324
|
+
file_extension: record.fileExtension,
|
|
325
|
+
chunk_index: record.chunkIndex,
|
|
326
|
+
content: record.content,
|
|
327
|
+
content_hash: record.contentHash,
|
|
328
|
+
file_hash: record.fileHash,
|
|
329
|
+
mtime_ms: record.mtimeMs,
|
|
330
|
+
size_bytes: record.sizeBytes
|
|
331
|
+
}
|
|
332
|
+
}));
|
|
333
|
+
await this.client.upsert(this.collectionName, {
|
|
334
|
+
wait: true,
|
|
335
|
+
points
|
|
336
|
+
});
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
async scrollRootChunks(rootId) {
|
|
340
|
+
const chunks = [];
|
|
341
|
+
let offset;
|
|
342
|
+
do {
|
|
343
|
+
const result = await this.client.scroll(this.collectionName, {
|
|
344
|
+
limit: 1024,
|
|
345
|
+
offset,
|
|
346
|
+
with_payload: true,
|
|
347
|
+
with_vector: false,
|
|
348
|
+
filter: {
|
|
349
|
+
must: [
|
|
350
|
+
{
|
|
351
|
+
key: "root_id",
|
|
352
|
+
match: {
|
|
353
|
+
value: rootId
|
|
354
|
+
}
|
|
355
|
+
}
|
|
356
|
+
]
|
|
357
|
+
}
|
|
358
|
+
});
|
|
359
|
+
chunks.push(
|
|
360
|
+
...(result.points ?? []).map((point) => ({
|
|
361
|
+
id: String(point.id),
|
|
362
|
+
rootId: String(point.payload?.root_id ?? ""),
|
|
363
|
+
rootPath: String(point.payload?.root_path ?? ""),
|
|
364
|
+
rootName: String(point.payload?.root_name ?? ""),
|
|
365
|
+
filePath: String(point.payload?.file_path ?? ""),
|
|
366
|
+
relativePath: String(point.payload?.relative_path ?? ""),
|
|
367
|
+
fileExtension: String(point.payload?.file_extension ?? ""),
|
|
368
|
+
chunkIndex: Number(point.payload?.chunk_index ?? 0),
|
|
369
|
+
content: String(point.payload?.content ?? ""),
|
|
370
|
+
contentHash: String(point.payload?.content_hash ?? ""),
|
|
371
|
+
fileHash: String(point.payload?.file_hash ?? ""),
|
|
372
|
+
mtimeMs: Number(point.payload?.mtime_ms ?? 0),
|
|
373
|
+
sizeBytes: Number(point.payload?.size_bytes ?? 0)
|
|
374
|
+
}))
|
|
375
|
+
);
|
|
376
|
+
offset = result.next_page_offset;
|
|
377
|
+
} while (offset !== void 0 && offset !== null);
|
|
378
|
+
return chunks;
|
|
379
|
+
}
|
|
380
|
+
async deleteRoot(rootId) {
|
|
381
|
+
await this.client.delete(this.collectionName, {
|
|
382
|
+
wait: true,
|
|
383
|
+
filter: {
|
|
384
|
+
must: [
|
|
385
|
+
{
|
|
386
|
+
key: "root_id",
|
|
387
|
+
match: {
|
|
388
|
+
value: rootId
|
|
389
|
+
}
|
|
390
|
+
}
|
|
391
|
+
]
|
|
392
|
+
}
|
|
393
|
+
});
|
|
394
|
+
}
|
|
395
|
+
async deleteFiles(rootId, filePaths) {
|
|
396
|
+
for (const filePath of filePaths) {
|
|
397
|
+
await this.client.delete(this.collectionName, {
|
|
398
|
+
wait: true,
|
|
399
|
+
filter: {
|
|
400
|
+
must: [
|
|
401
|
+
{
|
|
402
|
+
key: "root_id",
|
|
403
|
+
match: {
|
|
404
|
+
value: rootId
|
|
405
|
+
}
|
|
406
|
+
},
|
|
407
|
+
{
|
|
408
|
+
key: "file_path",
|
|
409
|
+
match: {
|
|
410
|
+
value: filePath
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
]
|
|
414
|
+
}
|
|
415
|
+
});
|
|
416
|
+
}
|
|
417
|
+
}
|
|
418
|
+
async search(vector, filters, limit) {
|
|
419
|
+
const must = [];
|
|
420
|
+
if (filters.rootIds?.length) {
|
|
421
|
+
must.push({
|
|
422
|
+
key: "root_id",
|
|
423
|
+
match: {
|
|
424
|
+
any: filters.rootIds
|
|
425
|
+
}
|
|
426
|
+
});
|
|
427
|
+
}
|
|
428
|
+
const results = await this.client.search(this.collectionName, {
|
|
429
|
+
vector,
|
|
430
|
+
limit: filters.pathSubstring ? Math.max(limit * 3, limit) : limit,
|
|
431
|
+
with_payload: true,
|
|
432
|
+
filter: must.length ? { must } : void 0
|
|
433
|
+
});
|
|
434
|
+
const hits = results.map((result) => ({
|
|
435
|
+
id: String(result.id),
|
|
436
|
+
score: result.score,
|
|
437
|
+
rootId: String(result.payload?.root_id ?? ""),
|
|
438
|
+
rootName: String(result.payload?.root_name ?? ""),
|
|
439
|
+
filePath: String(result.payload?.file_path ?? ""),
|
|
440
|
+
relativePath: String(result.payload?.relative_path ?? ""),
|
|
441
|
+
chunkIndex: Number(result.payload?.chunk_index ?? 0),
|
|
442
|
+
content: String(result.payload?.content ?? "")
|
|
443
|
+
}));
|
|
444
|
+
if (!filters.pathSubstring) {
|
|
445
|
+
return hits.slice(0, limit);
|
|
446
|
+
}
|
|
447
|
+
const needle = filters.pathSubstring.toLowerCase();
|
|
448
|
+
return hits.filter((hit) => hit.relativePath.toLowerCase().includes(needle)).slice(0, limit);
|
|
449
|
+
}
|
|
450
|
+
async getChunks(ids) {
|
|
451
|
+
if (ids.length === 0) {
|
|
452
|
+
return [];
|
|
453
|
+
}
|
|
454
|
+
const points = await this.client.retrieve(this.collectionName, {
|
|
455
|
+
ids,
|
|
456
|
+
with_payload: true,
|
|
457
|
+
with_vector: false
|
|
458
|
+
});
|
|
459
|
+
return (points ?? []).map((point) => ({
|
|
460
|
+
id: String(point.id),
|
|
461
|
+
rootId: String(point.payload?.root_id ?? ""),
|
|
462
|
+
rootName: String(point.payload?.root_name ?? ""),
|
|
463
|
+
filePath: String(point.payload?.file_path ?? ""),
|
|
464
|
+
relativePath: String(point.payload?.relative_path ?? ""),
|
|
465
|
+
chunkIndex: Number(point.payload?.chunk_index ?? 0),
|
|
466
|
+
content: String(point.payload?.content ?? "")
|
|
467
|
+
}));
|
|
468
|
+
}
|
|
469
|
+
async getStatus() {
|
|
470
|
+
const info = await this.client.getCollection(this.collectionName);
|
|
471
|
+
return {
|
|
472
|
+
collection: this.collectionName,
|
|
473
|
+
status: info.status,
|
|
474
|
+
pointsCount: info.points_count,
|
|
475
|
+
indexedVectorsCount: info.indexed_vectors_count,
|
|
476
|
+
vectorConfig: info.config?.params?.vectors ?? null
|
|
477
|
+
};
|
|
478
|
+
}
|
|
479
|
+
};
|
|
480
|
+
async function createStore() {
|
|
481
|
+
const config = await loadConfig();
|
|
482
|
+
const client2 = new QdrantClient({ url: config.qdrantUrl, checkCompatibility: false });
|
|
483
|
+
const store = new OwnSearchStore(client2, config.qdrantCollection, config.vectorSize);
|
|
484
|
+
await store.ensureCollection();
|
|
485
|
+
return store;
|
|
486
|
+
}
|
|
487
|
+
|
|
488
|
+
// src/indexer.ts
|
|
489
|
+
import fs3 from "fs/promises";
|
|
490
|
+
import path4 from "path";
|
|
491
|
+
|
|
492
|
+
// src/chunking.ts
|
|
493
|
+
function chunkText(content, chunkSize, chunkOverlap) {
|
|
494
|
+
const normalized = content.replace(/\r\n/g, "\n").trim();
|
|
495
|
+
if (!normalized) {
|
|
496
|
+
return [];
|
|
497
|
+
}
|
|
498
|
+
const chunks = [];
|
|
499
|
+
let start = 0;
|
|
500
|
+
while (start < normalized.length) {
|
|
501
|
+
let end = Math.min(start + chunkSize, normalized.length);
|
|
502
|
+
if (end < normalized.length) {
|
|
503
|
+
const lastBoundary = normalized.lastIndexOf("\n", end);
|
|
504
|
+
if (lastBoundary > start + Math.floor(chunkSize * 0.5)) {
|
|
505
|
+
end = lastBoundary;
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
const chunk = normalized.slice(start, end).trim();
|
|
509
|
+
if (chunk) {
|
|
510
|
+
chunks.push(chunk);
|
|
511
|
+
}
|
|
512
|
+
if (end >= normalized.length) {
|
|
513
|
+
break;
|
|
514
|
+
}
|
|
515
|
+
start = Math.max(end - chunkOverlap, start + 1);
|
|
516
|
+
}
|
|
517
|
+
return chunks;
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
// src/files.ts
|
|
521
|
+
import fs2 from "fs/promises";
|
|
522
|
+
import path3 from "path";
|
|
523
|
+
import { PDFParse } from "pdf-parse";
|
|
524
|
+
function sanitizeExtractedText(input) {
|
|
525
|
+
return input.replace(/\u0000/g, "").replace(/[\u0001-\u0008\u000B\u000C\u000E-\u001F\u007F]/g, " ").replace(/\r\n/g, "\n");
|
|
526
|
+
}
|
|
527
|
+
async function collectTextFiles(rootPath, maxFileBytes) {
|
|
528
|
+
const files = [];
|
|
529
|
+
const absoluteRoot = path3.resolve(rootPath);
|
|
530
|
+
const debug = process.env.OWNSEARCH_DEBUG_INDEX === "1";
|
|
531
|
+
function debugLog(...parts) {
|
|
532
|
+
if (debug) {
|
|
533
|
+
console.log("[ownsearch:index]", ...parts);
|
|
534
|
+
}
|
|
535
|
+
}
|
|
536
|
+
async function parsePdf(filePath) {
|
|
537
|
+
const buffer = await fs2.readFile(filePath);
|
|
538
|
+
const parser = new PDFParse({ data: buffer });
|
|
539
|
+
try {
|
|
540
|
+
const pdfData = await parser.getText();
|
|
541
|
+
return pdfData.text ?? "";
|
|
542
|
+
} finally {
|
|
543
|
+
await parser.destroy();
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
async function walk(currentPath) {
|
|
547
|
+
const entries = await fs2.readdir(currentPath, { withFileTypes: true });
|
|
548
|
+
for (const entry of entries) {
|
|
549
|
+
if (entry.name.startsWith(".") && entry.name !== ".env" && entry.name !== ".github") {
|
|
550
|
+
if (entry.isDirectory()) {
|
|
551
|
+
continue;
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
const nextPath = path3.join(currentPath, entry.name);
|
|
555
|
+
if (entry.isDirectory()) {
|
|
556
|
+
if (IGNORED_DIRECTORIES.has(entry.name)) {
|
|
557
|
+
continue;
|
|
558
|
+
}
|
|
559
|
+
await walk(nextPath);
|
|
560
|
+
continue;
|
|
561
|
+
}
|
|
562
|
+
const extension = path3.extname(entry.name).toLowerCase();
|
|
563
|
+
if (!SUPPORTED_TEXT_EXTENSIONS.has(extension)) {
|
|
564
|
+
debugLog("skip-extension", nextPath, extension);
|
|
565
|
+
continue;
|
|
566
|
+
}
|
|
567
|
+
const stats = await fs2.stat(nextPath);
|
|
568
|
+
if (stats.size > maxFileBytes) {
|
|
569
|
+
debugLog("skip-size", nextPath, stats.size);
|
|
570
|
+
continue;
|
|
571
|
+
}
|
|
572
|
+
let content = "";
|
|
573
|
+
try {
|
|
574
|
+
if (extension === ".pdf") {
|
|
575
|
+
content = await parsePdf(nextPath);
|
|
576
|
+
} else {
|
|
577
|
+
content = await fs2.readFile(nextPath, "utf8");
|
|
578
|
+
}
|
|
579
|
+
content = sanitizeExtractedText(content);
|
|
580
|
+
} catch (error) {
|
|
581
|
+
debugLog("skip-parse", nextPath, String(error));
|
|
582
|
+
continue;
|
|
583
|
+
}
|
|
584
|
+
if (!content || !content.trim()) {
|
|
585
|
+
debugLog("skip-empty", nextPath);
|
|
586
|
+
continue;
|
|
587
|
+
}
|
|
588
|
+
files.push({
|
|
589
|
+
path: nextPath,
|
|
590
|
+
relativePath: path3.relative(absoluteRoot, nextPath),
|
|
591
|
+
extension,
|
|
592
|
+
sizeBytes: stats.size,
|
|
593
|
+
mtimeMs: stats.mtimeMs,
|
|
594
|
+
content
|
|
595
|
+
});
|
|
596
|
+
}
|
|
597
|
+
}
|
|
598
|
+
await walk(absoluteRoot);
|
|
599
|
+
return files;
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
// src/indexer.ts
|
|
603
|
+
function buildChunkId(rootId, relativePath, chunkIndex, fileHash) {
|
|
604
|
+
return hashToUuid(`${rootId}:${relativePath}:${chunkIndex}:${fileHash}`);
|
|
605
|
+
}
|
|
606
|
+
async function embedRecords(records) {
|
|
607
|
+
if (records.length === 0) {
|
|
608
|
+
return { records: [], vectors: [], skipped: 0 };
|
|
609
|
+
}
|
|
610
|
+
try {
|
|
611
|
+
const vectors = await embedDocuments(records.map((record) => record.content));
|
|
612
|
+
return { records, vectors, skipped: 0 };
|
|
613
|
+
} catch (error) {
|
|
614
|
+
if (records.length === 1) {
|
|
615
|
+
const debug = process.env.OWNSEARCH_DEBUG_INDEX === "1";
|
|
616
|
+
if (debug) {
|
|
617
|
+
console.log("[ownsearch:embed]", "skip-chunk", records[0].relativePath, String(error));
|
|
618
|
+
}
|
|
619
|
+
return { records: [], vectors: [], skipped: 1 };
|
|
620
|
+
}
|
|
621
|
+
const midpoint = Math.floor(records.length / 2);
|
|
622
|
+
const left = await embedRecords(records.slice(0, midpoint));
|
|
623
|
+
const right = await embedRecords(records.slice(midpoint));
|
|
624
|
+
return {
|
|
625
|
+
records: [...left.records, ...right.records],
|
|
626
|
+
vectors: [...left.vectors, ...right.vectors],
|
|
627
|
+
skipped: left.skipped + right.skipped
|
|
628
|
+
};
|
|
629
|
+
}
|
|
630
|
+
}
|
|
631
|
+
async function indexPath(rootPath, options = {}) {
|
|
632
|
+
const absolutePath = path4.resolve(rootPath);
|
|
633
|
+
const stats = await fs3.stat(absolutePath).catch(() => void 0);
|
|
634
|
+
if (!stats?.isDirectory()) {
|
|
635
|
+
throw new OwnSearchError(`Path is not a readable directory: ${absolutePath}`);
|
|
636
|
+
}
|
|
637
|
+
const config = await loadConfig();
|
|
638
|
+
const root = await upsertRoot(absolutePath, options.name);
|
|
639
|
+
const store = await createStore();
|
|
640
|
+
const files = await collectTextFiles(root.path, options.maxFileBytes ?? config.maxFileBytes);
|
|
641
|
+
const existingChunks = await store.scrollRootChunks(root.id);
|
|
642
|
+
const records = [];
|
|
643
|
+
const filesByPath = /* @__PURE__ */ new Map();
|
|
644
|
+
const existingByPath = /* @__PURE__ */ new Map();
|
|
645
|
+
const refreshAllMetadata = existingChunks.some(
|
|
646
|
+
(chunk) => chunk.rootName !== root.name || chunk.rootPath !== root.path
|
|
647
|
+
);
|
|
648
|
+
for (const file of files) {
|
|
649
|
+
filesByPath.set(file.path, file);
|
|
650
|
+
}
|
|
651
|
+
for (const chunk of existingChunks) {
|
|
652
|
+
const list = existingByPath.get(chunk.filePath) ?? [];
|
|
653
|
+
list.push(chunk);
|
|
654
|
+
existingByPath.set(chunk.filePath, list);
|
|
655
|
+
}
|
|
656
|
+
const staleFiles = [];
|
|
657
|
+
for (const file of files) {
|
|
658
|
+
const fileHash = sha256(file.content);
|
|
659
|
+
const chunks = chunkText(file.content, config.chunkSize, config.chunkOverlap);
|
|
660
|
+
const existing = existingByPath.get(file.path);
|
|
661
|
+
const existingFileHash = existing?.[0]?.fileHash;
|
|
662
|
+
const existingChunkCount = existing?.length ?? 0;
|
|
663
|
+
if (!refreshAllMetadata && existing && existingFileHash === fileHash && existingChunkCount === chunks.length) {
|
|
664
|
+
continue;
|
|
665
|
+
}
|
|
666
|
+
if (existing?.length) {
|
|
667
|
+
staleFiles.push(file.path);
|
|
668
|
+
}
|
|
669
|
+
chunks.forEach((content, chunkIndex) => {
|
|
670
|
+
records.push({
|
|
671
|
+
id: buildChunkId(root.id, toPosixPath(file.relativePath), chunkIndex, fileHash),
|
|
672
|
+
rootId: root.id,
|
|
673
|
+
rootPath: root.path,
|
|
674
|
+
rootName: root.name,
|
|
675
|
+
filePath: file.path,
|
|
676
|
+
relativePath: toPosixPath(file.relativePath),
|
|
677
|
+
fileExtension: file.extension,
|
|
678
|
+
chunkIndex,
|
|
679
|
+
content,
|
|
680
|
+
contentHash: sha256(content),
|
|
681
|
+
fileHash,
|
|
682
|
+
mtimeMs: file.mtimeMs,
|
|
683
|
+
sizeBytes: file.sizeBytes
|
|
684
|
+
});
|
|
685
|
+
});
|
|
686
|
+
}
|
|
687
|
+
for (const [existingFilePath] of existingByPath.entries()) {
|
|
688
|
+
if (!filesByPath.has(existingFilePath)) {
|
|
689
|
+
staleFiles.push(existingFilePath);
|
|
690
|
+
}
|
|
691
|
+
}
|
|
692
|
+
if (staleFiles.length > 0) {
|
|
693
|
+
await store.deleteFiles(root.id, Array.from(new Set(staleFiles)));
|
|
694
|
+
}
|
|
695
|
+
if (records.length === 0) {
|
|
696
|
+
return {
|
|
697
|
+
root,
|
|
698
|
+
indexedFiles: files.length,
|
|
699
|
+
indexedChunks: 0,
|
|
700
|
+
skippedFiles: 0
|
|
701
|
+
};
|
|
702
|
+
}
|
|
703
|
+
const embedded = await embedRecords(records);
|
|
704
|
+
if (embedded.records.length > 0) {
|
|
705
|
+
await store.upsertChunks(embedded.records, embedded.vectors);
|
|
706
|
+
}
|
|
707
|
+
return {
|
|
708
|
+
root,
|
|
709
|
+
indexedFiles: files.length,
|
|
710
|
+
indexedChunks: embedded.records.length,
|
|
711
|
+
skippedFiles: 0
|
|
712
|
+
};
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
export {
|
|
716
|
+
buildContextBundle,
|
|
717
|
+
getConfigPath,
|
|
718
|
+
loadConfig,
|
|
719
|
+
deleteRootDefinition,
|
|
720
|
+
findRoot,
|
|
721
|
+
listRoots,
|
|
722
|
+
OwnSearchError,
|
|
723
|
+
embedQuery,
|
|
724
|
+
createStore,
|
|
725
|
+
indexPath
|
|
726
|
+
};
|
package/dist/cli.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
package/dist/cli.js
ADDED
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import {
|
|
3
|
+
OwnSearchError,
|
|
4
|
+
buildContextBundle,
|
|
5
|
+
createStore,
|
|
6
|
+
deleteRootDefinition,
|
|
7
|
+
embedQuery,
|
|
8
|
+
findRoot,
|
|
9
|
+
getConfigPath,
|
|
10
|
+
indexPath,
|
|
11
|
+
listRoots,
|
|
12
|
+
loadConfig
|
|
13
|
+
} from "./chunk-NLETDGQ5.js";
|
|
14
|
+
|
|
15
|
+
// src/cli.ts
|
|
16
|
+
import "dotenv/config";
|
|
17
|
+
import path from "path";
|
|
18
|
+
import { spawn } from "child_process";
|
|
19
|
+
import { fileURLToPath } from "url";
|
|
20
|
+
import { Command } from "commander";
|
|
21
|
+
|
|
22
|
+
// src/docker.ts
|
|
23
|
+
import { execFile } from "child_process";
|
|
24
|
+
import { promisify } from "util";
|
|
25
|
+
var execFileAsync = promisify(execFile);
|
|
26
|
+
async function runDocker(args) {
|
|
27
|
+
try {
|
|
28
|
+
const { stdout } = await execFileAsync("docker", args, { windowsHide: true });
|
|
29
|
+
return stdout.trim();
|
|
30
|
+
} catch (error) {
|
|
31
|
+
throw new OwnSearchError("Docker is required for Qdrant setup. Install Docker and ensure `docker` is on PATH.");
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
async function ensureQdrantDocker() {
|
|
35
|
+
const config = await loadConfig();
|
|
36
|
+
const containerName = config.qdrantContainerName;
|
|
37
|
+
const volumeName = config.qdrantVolumeName;
|
|
38
|
+
const existing = await runDocker(["ps", "-a", "--filter", `name=^/${containerName}$`, "--format", "{{.Names}}"]);
|
|
39
|
+
if (existing === containerName) {
|
|
40
|
+
const running = await runDocker(["inspect", "-f", "{{.State.Running}}", containerName]);
|
|
41
|
+
if (running === "true") {
|
|
42
|
+
return { started: false, url: config.qdrantUrl };
|
|
43
|
+
}
|
|
44
|
+
await runDocker(["start", containerName]);
|
|
45
|
+
return { started: true, url: config.qdrantUrl };
|
|
46
|
+
}
|
|
47
|
+
await runDocker([
|
|
48
|
+
"run",
|
|
49
|
+
"-d",
|
|
50
|
+
"--name",
|
|
51
|
+
containerName,
|
|
52
|
+
"-p",
|
|
53
|
+
"6333:6333",
|
|
54
|
+
"-p",
|
|
55
|
+
"6334:6334",
|
|
56
|
+
"-v",
|
|
57
|
+
`${volumeName}:/qdrant/storage`,
|
|
58
|
+
"qdrant/qdrant:latest"
|
|
59
|
+
]);
|
|
60
|
+
return { started: true, url: config.qdrantUrl };
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// src/cli.ts
|
|
64
|
+
var program = new Command();
|
|
65
|
+
function requireGeminiKey() {
|
|
66
|
+
if (!process.env.GEMINI_API_KEY) {
|
|
67
|
+
throw new OwnSearchError("Set GEMINI_API_KEY before running OwnSearch.");
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
program.name("ownsearch").description("Gemini-powered local search MCP server backed by Qdrant.").version("0.1.0");
|
|
71
|
+
program.command("setup").description("Create config and start a local Qdrant Docker container.").action(async () => {
|
|
72
|
+
const config = await loadConfig();
|
|
73
|
+
const result = await ensureQdrantDocker();
|
|
74
|
+
console.log(JSON.stringify({
|
|
75
|
+
configPath: getConfigPath(),
|
|
76
|
+
qdrantUrl: config.qdrantUrl,
|
|
77
|
+
qdrantStarted: result.started
|
|
78
|
+
}, null, 2));
|
|
79
|
+
if (!process.env.GEMINI_API_KEY) {
|
|
80
|
+
console.log("GEMINI_API_KEY is not set. Indexing and search will require it later.");
|
|
81
|
+
}
|
|
82
|
+
});
|
|
83
|
+
program.command("index").argument("<folder>", "Folder path to index").option("-n, --name <name>", "Display name for the indexed root").option("--max-file-bytes <n>", "Override the file size limit for this run", (value) => Number(value)).description("Index a local folder into Qdrant using Gemini embeddings.").action(async (folder, options) => {
|
|
84
|
+
requireGeminiKey();
|
|
85
|
+
const result = await indexPath(folder, {
|
|
86
|
+
name: options.name,
|
|
87
|
+
maxFileBytes: options.maxFileBytes
|
|
88
|
+
});
|
|
89
|
+
console.log(JSON.stringify(result, null, 2));
|
|
90
|
+
});
|
|
91
|
+
program.command("search").argument("<query>", "Natural language query").option("--root-id <rootId...>", "Restrict search to one or more root IDs (repeatable)").option("--limit <n>", "Max results (default 5)", (value) => Number(value), 5).option("--path <substr>", "Filter results to files whose relative path contains this substring").description("Embed a query with Gemini and search the local Qdrant store.").action(
|
|
92
|
+
async (query, options) => {
|
|
93
|
+
requireGeminiKey();
|
|
94
|
+
const store = await createStore();
|
|
95
|
+
const vector = await embedQuery(query);
|
|
96
|
+
const hits = await store.search(
|
|
97
|
+
vector,
|
|
98
|
+
{
|
|
99
|
+
rootIds: options.rootId,
|
|
100
|
+
pathSubstring: options.path
|
|
101
|
+
},
|
|
102
|
+
Math.max(1, Math.min(options.limit ?? 5, 50))
|
|
103
|
+
);
|
|
104
|
+
console.log(JSON.stringify({ query, hits }, null, 2));
|
|
105
|
+
}
|
|
106
|
+
);
|
|
107
|
+
program.command("search-context").argument("<query>", "Natural language query").option("--root-id <rootId...>", "Restrict search to one or more root IDs (repeatable)").option("--limit <n>", "Max search hits to consider (default 8)", (value) => Number(value), 8).option("--max-chars <n>", "Max context characters to return (default 12000)", (value) => Number(value), 12e3).option("--path <substr>", "Filter results to files whose relative path contains this substring").description("Search the local Qdrant store and return a bundled context payload for agent use.").action(
|
|
108
|
+
async (query, options) => {
|
|
109
|
+
requireGeminiKey();
|
|
110
|
+
const store = await createStore();
|
|
111
|
+
const vector = await embedQuery(query);
|
|
112
|
+
const hits = await store.search(
|
|
113
|
+
vector,
|
|
114
|
+
{
|
|
115
|
+
rootIds: options.rootId,
|
|
116
|
+
pathSubstring: options.path
|
|
117
|
+
},
|
|
118
|
+
Math.max(1, Math.min(options.limit ?? 8, 20))
|
|
119
|
+
);
|
|
120
|
+
console.log(JSON.stringify(buildContextBundle(query, hits, Math.max(500, options.maxChars ?? 12e3)), null, 2));
|
|
121
|
+
}
|
|
122
|
+
);
|
|
123
|
+
program.command("list-roots").description("List indexed roots registered in local config.").action(async () => {
|
|
124
|
+
console.log(JSON.stringify({ roots: await listRoots() }, null, 2));
|
|
125
|
+
});
|
|
126
|
+
program.command("delete-root").argument("<rootId>", "Root identifier to delete").description("Delete one indexed root from local config and Qdrant.").action(async (rootId) => {
|
|
127
|
+
const root = await findRoot(rootId);
|
|
128
|
+
if (!root) {
|
|
129
|
+
throw new OwnSearchError(`Unknown root: ${rootId}`);
|
|
130
|
+
}
|
|
131
|
+
const store = await createStore();
|
|
132
|
+
await store.deleteRoot(root.id);
|
|
133
|
+
await deleteRootDefinition(root.id);
|
|
134
|
+
console.log(JSON.stringify({ deleted: true, root }, null, 2));
|
|
135
|
+
});
|
|
136
|
+
program.command("store-status").description("Show Qdrant collection status for this package.").action(async () => {
|
|
137
|
+
const store = await createStore();
|
|
138
|
+
console.log(JSON.stringify(await store.getStatus(), null, 2));
|
|
139
|
+
});
|
|
140
|
+
program.command("doctor").description("Check local prerequisites and package configuration.").action(async () => {
|
|
141
|
+
const config = await loadConfig();
|
|
142
|
+
const roots = await listRoots();
|
|
143
|
+
let qdrantReachable = false;
|
|
144
|
+
try {
|
|
145
|
+
const store = await createStore();
|
|
146
|
+
await store.getStatus();
|
|
147
|
+
qdrantReachable = true;
|
|
148
|
+
} catch (error) {
|
|
149
|
+
qdrantReachable = false;
|
|
150
|
+
}
|
|
151
|
+
console.log(JSON.stringify({
|
|
152
|
+
configPath: getConfigPath(),
|
|
153
|
+
geminiApiKeyPresent: Boolean(process.env.GEMINI_API_KEY),
|
|
154
|
+
qdrantUrl: config.qdrantUrl,
|
|
155
|
+
qdrantReachable,
|
|
156
|
+
collection: config.qdrantCollection,
|
|
157
|
+
embeddingModel: config.embeddingModel,
|
|
158
|
+
vectorSize: config.vectorSize,
|
|
159
|
+
chunkSize: config.chunkSize,
|
|
160
|
+
chunkOverlap: config.chunkOverlap,
|
|
161
|
+
maxFileBytes: config.maxFileBytes,
|
|
162
|
+
rootCount: roots.length
|
|
163
|
+
}, null, 2));
|
|
164
|
+
});
|
|
165
|
+
program.command("serve-mcp").description("Start the stdio MCP server.").action(async () => {
|
|
166
|
+
const currentFilePath = fileURLToPath(import.meta.url);
|
|
167
|
+
const serverPath = path.join(path.dirname(currentFilePath), "mcp", "server.js");
|
|
168
|
+
const child = spawn(process.execPath, [serverPath], {
|
|
169
|
+
stdio: "inherit",
|
|
170
|
+
env: process.env
|
|
171
|
+
});
|
|
172
|
+
child.on("exit", (code) => {
|
|
173
|
+
process.exitCode = code ?? 0;
|
|
174
|
+
});
|
|
175
|
+
});
|
|
176
|
+
program.command("print-agent-config").argument("<agent>", "codex | claude-desktop | cursor").description("Print an MCP config snippet for a supported agent.").action(async (agent) => {
|
|
177
|
+
const config = {
|
|
178
|
+
command: "npx",
|
|
179
|
+
args: ["-y", "ownsearch", "serve-mcp"],
|
|
180
|
+
env: {
|
|
181
|
+
GEMINI_API_KEY: "${GEMINI_API_KEY}"
|
|
182
|
+
}
|
|
183
|
+
};
|
|
184
|
+
switch (agent) {
|
|
185
|
+
case "codex":
|
|
186
|
+
case "claude-desktop":
|
|
187
|
+
case "cursor":
|
|
188
|
+
console.log(JSON.stringify({ ownsearch: config }, null, 2));
|
|
189
|
+
return;
|
|
190
|
+
default:
|
|
191
|
+
throw new OwnSearchError(`Unsupported agent: ${agent}`);
|
|
192
|
+
}
|
|
193
|
+
});
|
|
194
|
+
program.parseAsync(process.argv).catch((error) => {
|
|
195
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
196
|
+
console.error(message);
|
|
197
|
+
process.exitCode = 1;
|
|
198
|
+
});
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
import {
|
|
3
|
+
OwnSearchError,
|
|
4
|
+
buildContextBundle,
|
|
5
|
+
createStore,
|
|
6
|
+
deleteRootDefinition,
|
|
7
|
+
embedQuery,
|
|
8
|
+
findRoot,
|
|
9
|
+
indexPath,
|
|
10
|
+
loadConfig
|
|
11
|
+
} from "../chunk-NLETDGQ5.js";
|
|
12
|
+
|
|
13
|
+
// src/mcp/server.ts
|
|
14
|
+
import "dotenv/config";
|
|
15
|
+
import { Server } from "@modelcontextprotocol/sdk/server/index.js";
|
|
16
|
+
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
|
|
17
|
+
import { CallToolRequestSchema, ListToolsRequestSchema } from "@modelcontextprotocol/sdk/types.js";
|
|
18
|
+
function asText(result) {
|
|
19
|
+
return {
|
|
20
|
+
content: [
|
|
21
|
+
{
|
|
22
|
+
type: "text",
|
|
23
|
+
text: JSON.stringify(result, null, 2)
|
|
24
|
+
}
|
|
25
|
+
]
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
function asError(error) {
|
|
29
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
30
|
+
return {
|
|
31
|
+
isError: true,
|
|
32
|
+
content: [
|
|
33
|
+
{
|
|
34
|
+
type: "text",
|
|
35
|
+
text: message
|
|
36
|
+
}
|
|
37
|
+
]
|
|
38
|
+
};
|
|
39
|
+
}
|
|
40
|
+
var server = new Server(
|
|
41
|
+
{
|
|
42
|
+
name: "ownsearch",
|
|
43
|
+
version: "0.1.0"
|
|
44
|
+
},
|
|
45
|
+
{
|
|
46
|
+
capabilities: {
|
|
47
|
+
tools: {}
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
);
|
|
51
|
+
server.setRequestHandler(ListToolsRequestSchema, async () => ({
|
|
52
|
+
tools: [
|
|
53
|
+
{
|
|
54
|
+
name: "index_path",
|
|
55
|
+
description: "Register a local folder and sync its Gemini embedding index into Qdrant.",
|
|
56
|
+
inputSchema: {
|
|
57
|
+
type: "object",
|
|
58
|
+
properties: {
|
|
59
|
+
path: { type: "string", description: "Absolute or relative folder path to index." },
|
|
60
|
+
name: { type: "string", description: "Optional display name for this indexed root." }
|
|
61
|
+
},
|
|
62
|
+
required: ["path"]
|
|
63
|
+
}
|
|
64
|
+
},
|
|
65
|
+
{
|
|
66
|
+
name: "search",
|
|
67
|
+
description: "Semantic search over one root or the full local Qdrant store.",
|
|
68
|
+
inputSchema: {
|
|
69
|
+
type: "object",
|
|
70
|
+
properties: {
|
|
71
|
+
query: { type: "string", description: "Natural language search query." },
|
|
72
|
+
rootIds: {
|
|
73
|
+
type: "array",
|
|
74
|
+
items: { type: "string" },
|
|
75
|
+
description: "Optional list of root IDs to restrict search."
|
|
76
|
+
},
|
|
77
|
+
limit: { type: "number", description: "Maximum result count. Default 5." },
|
|
78
|
+
pathSubstring: { type: "string", description: "Optional file path substring filter." }
|
|
79
|
+
},
|
|
80
|
+
required: ["query"]
|
|
81
|
+
}
|
|
82
|
+
},
|
|
83
|
+
{
|
|
84
|
+
name: "search_context",
|
|
85
|
+
description: "Search and return a bundled context payload with top chunks for direct agent grounding.",
|
|
86
|
+
inputSchema: {
|
|
87
|
+
type: "object",
|
|
88
|
+
properties: {
|
|
89
|
+
query: { type: "string", description: "Natural language search query." },
|
|
90
|
+
rootIds: {
|
|
91
|
+
type: "array",
|
|
92
|
+
items: { type: "string" },
|
|
93
|
+
description: "Optional list of root IDs to restrict search."
|
|
94
|
+
},
|
|
95
|
+
limit: { type: "number", description: "Maximum search hits to consider. Default 8." },
|
|
96
|
+
maxChars: { type: "number", description: "Maximum total characters of bundled context. Default 12000." },
|
|
97
|
+
pathSubstring: { type: "string", description: "Optional file path substring filter." }
|
|
98
|
+
},
|
|
99
|
+
required: ["query"]
|
|
100
|
+
}
|
|
101
|
+
},
|
|
102
|
+
{
|
|
103
|
+
name: "get_chunks",
|
|
104
|
+
description: "Fetch exact indexed chunks by id after a search step.",
|
|
105
|
+
inputSchema: {
|
|
106
|
+
type: "object",
|
|
107
|
+
properties: {
|
|
108
|
+
ids: {
|
|
109
|
+
type: "array",
|
|
110
|
+
items: { type: "string" },
|
|
111
|
+
description: "Chunk ids returned by search."
|
|
112
|
+
}
|
|
113
|
+
},
|
|
114
|
+
required: ["ids"]
|
|
115
|
+
}
|
|
116
|
+
},
|
|
117
|
+
{
|
|
118
|
+
name: "list_roots",
|
|
119
|
+
description: "List approved indexed roots.",
|
|
120
|
+
inputSchema: {
|
|
121
|
+
type: "object",
|
|
122
|
+
properties: {}
|
|
123
|
+
}
|
|
124
|
+
},
|
|
125
|
+
{
|
|
126
|
+
name: "delete_root",
|
|
127
|
+
description: "Delete one indexed root from config and vector storage.",
|
|
128
|
+
inputSchema: {
|
|
129
|
+
type: "object",
|
|
130
|
+
properties: {
|
|
131
|
+
rootId: { type: "string", description: "Root identifier returned by list_roots." }
|
|
132
|
+
},
|
|
133
|
+
required: ["rootId"]
|
|
134
|
+
}
|
|
135
|
+
},
|
|
136
|
+
{
|
|
137
|
+
name: "store_status",
|
|
138
|
+
description: "Inspect Qdrant collection status for the local index.",
|
|
139
|
+
inputSchema: {
|
|
140
|
+
type: "object",
|
|
141
|
+
properties: {}
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
]
|
|
145
|
+
}));
|
|
146
|
+
server.setRequestHandler(CallToolRequestSchema, async (request) => {
|
|
147
|
+
try {
|
|
148
|
+
switch (request.params.name) {
|
|
149
|
+
case "index_path": {
|
|
150
|
+
const args = request.params.arguments;
|
|
151
|
+
if (!args?.path) {
|
|
152
|
+
throw new OwnSearchError("`path` is required.");
|
|
153
|
+
}
|
|
154
|
+
const result = await indexPath(args.path, { name: args.name });
|
|
155
|
+
return asText(result);
|
|
156
|
+
}
|
|
157
|
+
case "search": {
|
|
158
|
+
const args = request.params.arguments;
|
|
159
|
+
if (!args?.query) {
|
|
160
|
+
throw new OwnSearchError("`query` is required.");
|
|
161
|
+
}
|
|
162
|
+
const vector = await embedQuery(args.query);
|
|
163
|
+
const store = await createStore();
|
|
164
|
+
const hits = await store.search(
|
|
165
|
+
vector,
|
|
166
|
+
{
|
|
167
|
+
rootIds: args.rootIds,
|
|
168
|
+
pathSubstring: args.pathSubstring
|
|
169
|
+
},
|
|
170
|
+
Math.max(1, Math.min(args.limit ?? 5, 20))
|
|
171
|
+
);
|
|
172
|
+
return asText({
|
|
173
|
+
query: args.query,
|
|
174
|
+
hits
|
|
175
|
+
});
|
|
176
|
+
}
|
|
177
|
+
case "search_context": {
|
|
178
|
+
const args = request.params.arguments;
|
|
179
|
+
if (!args?.query) {
|
|
180
|
+
throw new OwnSearchError("`query` is required.");
|
|
181
|
+
}
|
|
182
|
+
const vector = await embedQuery(args.query);
|
|
183
|
+
const store = await createStore();
|
|
184
|
+
const hits = await store.search(
|
|
185
|
+
vector,
|
|
186
|
+
{
|
|
187
|
+
rootIds: args.rootIds,
|
|
188
|
+
pathSubstring: args.pathSubstring
|
|
189
|
+
},
|
|
190
|
+
Math.max(1, Math.min(args.limit ?? 8, 20))
|
|
191
|
+
);
|
|
192
|
+
return asText(buildContextBundle(args.query, hits, Math.max(500, args.maxChars ?? 12e3)));
|
|
193
|
+
}
|
|
194
|
+
case "get_chunks": {
|
|
195
|
+
const args = request.params.arguments;
|
|
196
|
+
if (!args?.ids?.length) {
|
|
197
|
+
throw new OwnSearchError("`ids` is required.");
|
|
198
|
+
}
|
|
199
|
+
const store = await createStore();
|
|
200
|
+
const chunks = await store.getChunks(args.ids);
|
|
201
|
+
return asText({ chunks });
|
|
202
|
+
}
|
|
203
|
+
case "list_roots": {
|
|
204
|
+
const config = await loadConfig();
|
|
205
|
+
return asText({ roots: config.roots });
|
|
206
|
+
}
|
|
207
|
+
case "delete_root": {
|
|
208
|
+
const args = request.params.arguments;
|
|
209
|
+
if (!args?.rootId) {
|
|
210
|
+
throw new OwnSearchError("`rootId` is required.");
|
|
211
|
+
}
|
|
212
|
+
const root = await findRoot(args.rootId);
|
|
213
|
+
if (!root) {
|
|
214
|
+
throw new OwnSearchError(`Unknown root: ${args.rootId}`);
|
|
215
|
+
}
|
|
216
|
+
const store = await createStore();
|
|
217
|
+
await store.deleteRoot(root.id);
|
|
218
|
+
await deleteRootDefinition(root.id);
|
|
219
|
+
return asText({
|
|
220
|
+
deleted: true,
|
|
221
|
+
root
|
|
222
|
+
});
|
|
223
|
+
}
|
|
224
|
+
case "store_status": {
|
|
225
|
+
const store = await createStore();
|
|
226
|
+
return asText(await store.getStatus());
|
|
227
|
+
}
|
|
228
|
+
default:
|
|
229
|
+
throw new OwnSearchError(`Unknown tool: ${request.params.name}`);
|
|
230
|
+
}
|
|
231
|
+
} catch (error) {
|
|
232
|
+
return asError(error);
|
|
233
|
+
}
|
|
234
|
+
});
|
|
235
|
+
async function main() {
|
|
236
|
+
const transport = new StdioServerTransport();
|
|
237
|
+
await server.connect(transport);
|
|
238
|
+
}
|
|
239
|
+
main().catch((error) => {
|
|
240
|
+
console.error(error);
|
|
241
|
+
process.exitCode = 1;
|
|
242
|
+
});
|
package/package.json
ADDED
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "ownsearch",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Text-first local document search MCP server backed by Gemini embeddings and Qdrant.",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"bin": {
|
|
7
|
+
"ownsearch": "./dist/cli.js"
|
|
8
|
+
},
|
|
9
|
+
"files": [
|
|
10
|
+
"dist",
|
|
11
|
+
"README.md"
|
|
12
|
+
],
|
|
13
|
+
"scripts": {
|
|
14
|
+
"build": "tsup src/cli.ts src/mcp/server.ts --format esm --dts --clean --external pdf-parse",
|
|
15
|
+
"dev": "tsx src/cli.ts",
|
|
16
|
+
"prepare": "npm run build",
|
|
17
|
+
"prepublishOnly": "npm run typecheck && npm run build",
|
|
18
|
+
"serve-mcp": "tsx src/mcp/server.ts",
|
|
19
|
+
"typecheck": "tsc --noEmit"
|
|
20
|
+
},
|
|
21
|
+
"keywords": [
|
|
22
|
+
"mcp",
|
|
23
|
+
"search",
|
|
24
|
+
"embeddings",
|
|
25
|
+
"gemini",
|
|
26
|
+
"qdrant",
|
|
27
|
+
"rag"
|
|
28
|
+
],
|
|
29
|
+
"author": "OwnSearch contributors",
|
|
30
|
+
"license": "MIT",
|
|
31
|
+
"repository": {
|
|
32
|
+
"type": "git",
|
|
33
|
+
"url": "git+ssh://git@personal/Grumppie/OwnSearch.git"
|
|
34
|
+
},
|
|
35
|
+
"homepage": "https://github.com/Grumppie/OwnSearch#readme",
|
|
36
|
+
"bugs": {
|
|
37
|
+
"url": "https://github.com/Grumppie/OwnSearch/issues"
|
|
38
|
+
},
|
|
39
|
+
"publishConfig": {
|
|
40
|
+
"access": "public"
|
|
41
|
+
},
|
|
42
|
+
"engines": {
|
|
43
|
+
"node": ">=20"
|
|
44
|
+
},
|
|
45
|
+
"dependencies": {
|
|
46
|
+
"@google/genai": "^1.46.0",
|
|
47
|
+
"@modelcontextprotocol/sdk": "^1.27.1",
|
|
48
|
+
"@qdrant/js-client-rest": "^1.17.0",
|
|
49
|
+
"commander": "^14.0.1",
|
|
50
|
+
"dotenv": "^17.3.1",
|
|
51
|
+
"pdf-parse": "^2.4.5",
|
|
52
|
+
"zod": "^3.25.76"
|
|
53
|
+
},
|
|
54
|
+
"devDependencies": {
|
|
55
|
+
"@types/node": "^24.6.0",
|
|
56
|
+
"@types/pdf-parse": "^1.1.5",
|
|
57
|
+
"tsup": "^8.5.0",
|
|
58
|
+
"tsx": "^4.20.6",
|
|
59
|
+
"typescript": "^5.9.3"
|
|
60
|
+
}
|
|
61
|
+
}
|