@tideshift/sextant 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +25 -0
- package/LICENSE +21 -0
- package/README.md +123 -0
- package/package.json +53 -0
- package/src/config.ts +50 -0
- package/src/ignore-files.ts +31 -0
- package/src/index.ts +69 -0
- package/src/indexer/chunker.ts +245 -0
- package/src/indexer/embedder.ts +79 -0
- package/src/indexer/freshness.ts +74 -0
- package/src/indexer/pipeline.ts +161 -0
- package/src/indexer/state.ts +70 -0
- package/src/indexer/types.ts +34 -0
- package/src/scripts/diag.ts +154 -0
- package/src/server.ts +98 -0
- package/src/store/metadata-db.ts +109 -0
- package/src/store/orama-store.ts +172 -0
- package/src/store/persistence.ts +99 -0
- package/src/tools/get.ts +22 -0
- package/src/tools/list.ts +31 -0
- package/src/tools/reindex.ts +29 -0
- package/src/tools/search.ts +93 -0
- package/src/tools/status.ts +53 -0
|
@@ -0,0 +1,109 @@
|
|
|
1
|
+
import { Database } from 'bun:sqlite';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
import { config } from '../config.ts';
|
|
4
|
+
import type { IndexedFile } from '../indexer/types.ts';
|
|
5
|
+
|
|
6
|
+
let db: Database | null = null;
|
|
7
|
+
|
|
8
|
+
export function initMetadataDb(): Database {
|
|
9
|
+
const dbPath = path.join(config.dataPath, 'metadata.db');
|
|
10
|
+
db = new Database(dbPath);
|
|
11
|
+
db.run('PRAGMA journal_mode = WAL');
|
|
12
|
+
db.run(`
|
|
13
|
+
CREATE TABLE IF NOT EXISTS indexed_files (
|
|
14
|
+
filePath TEXT PRIMARY KEY,
|
|
15
|
+
lastModified INTEGER NOT NULL,
|
|
16
|
+
chunkCount INTEGER NOT NULL,
|
|
17
|
+
title TEXT,
|
|
18
|
+
category TEXT,
|
|
19
|
+
indexedAt INTEGER NOT NULL
|
|
20
|
+
)
|
|
21
|
+
`);
|
|
22
|
+
|
|
23
|
+
// Migration: add chunkIds column for deterministic chunk removal
|
|
24
|
+
const columns = db.query('PRAGMA table_info(indexed_files)').all() as { name: string }[];
|
|
25
|
+
if (!columns.some((c) => c.name === 'chunkIds')) {
|
|
26
|
+
db.run('ALTER TABLE indexed_files ADD COLUMN chunkIds TEXT');
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
return db;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
export function getMetadataDb(): Database {
|
|
33
|
+
if (!db) throw new Error('Metadata DB not initialized. Call initMetadataDb() first.');
|
|
34
|
+
return db;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
export function upsertFile(
|
|
38
|
+
filePath: string,
|
|
39
|
+
lastModified: number,
|
|
40
|
+
chunkCount: number,
|
|
41
|
+
title: string | null,
|
|
42
|
+
category: string | null,
|
|
43
|
+
chunkIds?: string[]
|
|
44
|
+
): void {
|
|
45
|
+
const d = getMetadataDb();
|
|
46
|
+
d.query(
|
|
47
|
+
`INSERT OR REPLACE INTO indexed_files (filePath, lastModified, chunkCount, title, category, indexedAt, chunkIds)
|
|
48
|
+
VALUES ($filePath, $lastModified, $chunkCount, $title, $category, $indexedAt, $chunkIds)`
|
|
49
|
+
).run({
|
|
50
|
+
$filePath: filePath,
|
|
51
|
+
$lastModified: lastModified,
|
|
52
|
+
$chunkCount: chunkCount,
|
|
53
|
+
$title: title,
|
|
54
|
+
$category: category,
|
|
55
|
+
$indexedAt: Date.now(),
|
|
56
|
+
$chunkIds: chunkIds ? JSON.stringify(chunkIds) : null,
|
|
57
|
+
});
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
export function getChunkIds(filePath: string): string[] | null {
|
|
61
|
+
const file = getFile(filePath);
|
|
62
|
+
if (!file?.chunkIds) return null;
|
|
63
|
+
try {
|
|
64
|
+
return JSON.parse(file.chunkIds);
|
|
65
|
+
} catch {
|
|
66
|
+
return null;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
export function getFile(filePath: string): IndexedFile | null {
|
|
71
|
+
const d = getMetadataDb();
|
|
72
|
+
return d.query('SELECT * FROM indexed_files WHERE filePath = $filePath').get({
|
|
73
|
+
$filePath: filePath,
|
|
74
|
+
}) as IndexedFile | null;
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
export function removeFile(filePath: string): void {
|
|
78
|
+
const d = getMetadataDb();
|
|
79
|
+
d.query('DELETE FROM indexed_files WHERE filePath = $filePath').run({
|
|
80
|
+
$filePath: filePath,
|
|
81
|
+
});
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
export function listFiles(category?: string): IndexedFile[] {
|
|
85
|
+
const d = getMetadataDb();
|
|
86
|
+
if (category) {
|
|
87
|
+
return d.query('SELECT * FROM indexed_files WHERE category = $category ORDER BY filePath').all({
|
|
88
|
+
$category: category,
|
|
89
|
+
}) as IndexedFile[];
|
|
90
|
+
}
|
|
91
|
+
return d.query('SELECT * FROM indexed_files ORDER BY filePath').all() as IndexedFile[];
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
export function getAllFiles(): IndexedFile[] {
|
|
95
|
+
const d = getMetadataDb();
|
|
96
|
+
return d.query('SELECT * FROM indexed_files ORDER BY filePath').all() as IndexedFile[];
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
export function clearAll(): void {
|
|
100
|
+
const d = getMetadataDb();
|
|
101
|
+
d.run('DELETE FROM indexed_files');
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
export function closeMetadataDb(): void {
|
|
105
|
+
if (db) {
|
|
106
|
+
db.close();
|
|
107
|
+
db = null;
|
|
108
|
+
}
|
|
109
|
+
}
|
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
import { create, insert, insertMultiple, remove, search, save, load } from '@orama/orama';
|
|
2
|
+
import type { Orama, Results, SearchParams } from '@orama/orama';
|
|
3
|
+
import { config } from '../config.ts';
|
|
4
|
+
import type { DocChunk } from '../indexer/types.ts';
|
|
5
|
+
|
|
6
|
+
const SCHEMA = {
|
|
7
|
+
id: 'string' as const,
|
|
8
|
+
filePath: 'string' as const,
|
|
9
|
+
fileName: 'string' as const,
|
|
10
|
+
category: 'string' as const,
|
|
11
|
+
headingSlug: 'string' as const,
|
|
12
|
+
content: 'string' as const,
|
|
13
|
+
chunkIndex: 'number' as const,
|
|
14
|
+
lastModified: 'number' as const,
|
|
15
|
+
embedding: `vector[${config.embeddingDims}]` as const,
|
|
16
|
+
};
|
|
17
|
+
|
|
18
|
+
export type OramaDB = Orama<typeof SCHEMA>;
|
|
19
|
+
|
|
20
|
+
let db: OramaDB | null = null;
|
|
21
|
+
|
|
22
|
+
export function getSchema() {
|
|
23
|
+
return SCHEMA;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export async function initStore(): Promise<OramaDB> {
|
|
27
|
+
db = await create({ schema: SCHEMA }) as OramaDB;
|
|
28
|
+
return db;
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export function getStore(): OramaDB {
|
|
32
|
+
if (!db) throw new Error('Orama store not initialized. Call initStore() first.');
|
|
33
|
+
return db;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export function setStore(newDb: OramaDB): void {
|
|
37
|
+
db = newDb;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
export async function insertChunks(chunks: DocChunk[], embeddings: number[][]): Promise<void> {
|
|
41
|
+
const store = getStore();
|
|
42
|
+
const docs = chunks.map((chunk, i) => ({
|
|
43
|
+
id: chunk.id,
|
|
44
|
+
filePath: chunk.filePath,
|
|
45
|
+
fileName: chunk.fileName,
|
|
46
|
+
category: chunk.category,
|
|
47
|
+
headingSlug: chunk.headingSlug,
|
|
48
|
+
content: chunk.content,
|
|
49
|
+
chunkIndex: chunk.chunkIndex,
|
|
50
|
+
lastModified: chunk.lastModified,
|
|
51
|
+
embedding: embeddings[i]!,
|
|
52
|
+
}));
|
|
53
|
+
|
|
54
|
+
await insertMultiple(store, docs);
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export async function removeByIds(ids: string[]): Promise<number> {
|
|
58
|
+
const store = getStore();
|
|
59
|
+
let removed = 0;
|
|
60
|
+
for (const id of ids) {
|
|
61
|
+
try {
|
|
62
|
+
await remove(store, id);
|
|
63
|
+
removed++;
|
|
64
|
+
} catch {
|
|
65
|
+
// ID not found in store, skip
|
|
66
|
+
}
|
|
67
|
+
}
|
|
68
|
+
return removed;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
export async function removeByFile(filePath: string): Promise<number> {
|
|
72
|
+
const store = getStore();
|
|
73
|
+
// Fallback: search for chunks by filePath (used when chunkIds are not available)
|
|
74
|
+
const results = await search(store, {
|
|
75
|
+
mode: 'fulltext',
|
|
76
|
+
term: filePath,
|
|
77
|
+
properties: ['filePath'],
|
|
78
|
+
limit: 10000,
|
|
79
|
+
} as any);
|
|
80
|
+
|
|
81
|
+
let removed = 0;
|
|
82
|
+
for (const hit of results.hits) {
|
|
83
|
+
if ((hit.document as any).filePath === filePath) {
|
|
84
|
+
await remove(store, hit.id);
|
|
85
|
+
removed++;
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
return removed;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
export async function searchHybrid(
|
|
92
|
+
query: string,
|
|
93
|
+
queryEmbedding: number[],
|
|
94
|
+
topK: number,
|
|
95
|
+
category?: string
|
|
96
|
+
): Promise<Results<any>> {
|
|
97
|
+
const store = getStore();
|
|
98
|
+
const params: any = {
|
|
99
|
+
mode: 'hybrid',
|
|
100
|
+
term: query,
|
|
101
|
+
vector: {
|
|
102
|
+
value: queryEmbedding,
|
|
103
|
+
property: 'embedding',
|
|
104
|
+
},
|
|
105
|
+
similarity: config.similarityThreshold,
|
|
106
|
+
limit: topK,
|
|
107
|
+
hybridWeights: {
|
|
108
|
+
text: config.hybridWeightText,
|
|
109
|
+
vector: config.hybridWeightVector,
|
|
110
|
+
},
|
|
111
|
+
};
|
|
112
|
+
|
|
113
|
+
if (category) {
|
|
114
|
+
params.where = { category: { eq: category } };
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
return search(store, params);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
export async function searchKeyword(query: string, topK: number, category?: string): Promise<Results<any>> {
|
|
121
|
+
const store = getStore();
|
|
122
|
+
const params: any = {
|
|
123
|
+
mode: 'fulltext',
|
|
124
|
+
term: query,
|
|
125
|
+
limit: topK,
|
|
126
|
+
};
|
|
127
|
+
|
|
128
|
+
if (category) {
|
|
129
|
+
params.where = { category: { eq: category } };
|
|
130
|
+
}
|
|
131
|
+
|
|
132
|
+
return search(store, params);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
export async function searchVector(queryEmbedding: number[], topK: number, category?: string): Promise<Results<any>> {
|
|
136
|
+
const store = getStore();
|
|
137
|
+
const params: any = {
|
|
138
|
+
mode: 'vector',
|
|
139
|
+
vector: {
|
|
140
|
+
value: queryEmbedding,
|
|
141
|
+
property: 'embedding',
|
|
142
|
+
},
|
|
143
|
+
similarity: config.similarityThreshold,
|
|
144
|
+
limit: topK,
|
|
145
|
+
};
|
|
146
|
+
|
|
147
|
+
if (category) {
|
|
148
|
+
params.where = { category: { eq: category } };
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
return search(store, params);
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
export async function getDocumentCount(): Promise<number> {
|
|
155
|
+
const store = getStore();
|
|
156
|
+
const results = await search(store, {
|
|
157
|
+
mode: 'fulltext',
|
|
158
|
+
term: '',
|
|
159
|
+
limit: 0,
|
|
160
|
+
} as any);
|
|
161
|
+
return results.count;
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
export async function saveIndex(): Promise<any> {
|
|
165
|
+
return save(getStore());
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
export async function loadIndex(data: any): Promise<void> {
|
|
169
|
+
const store = await create({ schema: SCHEMA }) as OramaDB;
|
|
170
|
+
await load(store, data);
|
|
171
|
+
db = store;
|
|
172
|
+
}
|
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
import path from 'path';
|
|
2
|
+
import { rename } from 'fs/promises';
|
|
3
|
+
import { config } from '../config.ts';
|
|
4
|
+
import { saveIndex, loadIndex, initStore, getStore } from './orama-store.ts';
|
|
5
|
+
import { search } from '@orama/orama';
|
|
6
|
+
import { clearAll as clearMetadata } from './metadata-db.ts';
|
|
7
|
+
|
|
8
|
+
const ORAMA_PATH = path.join(config.dataPath, 'orama.bin');
|
|
9
|
+
|
|
10
|
+
let persistTimer: ReturnType<typeof setTimeout> | null = null;
|
|
11
|
+
let lastKnownMtimeMs = 0;
|
|
12
|
+
|
|
13
|
+
export async function persistToDisk(): Promise<void> {
|
|
14
|
+
try {
|
|
15
|
+
const data = await saveIndex();
|
|
16
|
+
// Atomic write: write to temp file, then rename to avoid torn reads by concurrent instances
|
|
17
|
+
const tempPath = ORAMA_PATH + '.tmp';
|
|
18
|
+
await Bun.write(tempPath, JSON.stringify(data));
|
|
19
|
+
await rename(tempPath, ORAMA_PATH);
|
|
20
|
+
// Track mtime so we know this write is ours
|
|
21
|
+
const stat = await Bun.file(ORAMA_PATH).stat();
|
|
22
|
+
if (stat) lastKnownMtimeMs = stat.mtimeMs;
|
|
23
|
+
console.error('[persistence] Index saved to disk');
|
|
24
|
+
} catch (err) {
|
|
25
|
+
console.error('[persistence] Failed to save index:', err);
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export function debouncedPersist(): void {
|
|
30
|
+
if (persistTimer) clearTimeout(persistTimer);
|
|
31
|
+
persistTimer = setTimeout(() => {
|
|
32
|
+
persistToDisk().catch((err) => console.error('[persistence] Debounced persist failed:', err));
|
|
33
|
+
}, config.persistDebounceMs);
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
export async function loadFromDisk(): Promise<boolean> {
|
|
37
|
+
try {
|
|
38
|
+
const file = Bun.file(ORAMA_PATH);
|
|
39
|
+
if (await file.exists()) {
|
|
40
|
+
const data = JSON.parse(await file.text());
|
|
41
|
+
await loadIndex(data);
|
|
42
|
+
|
|
43
|
+
// Validate that the stored index dimensions match the current config
|
|
44
|
+
const probe = new Float32Array(config.embeddingDims);
|
|
45
|
+
try {
|
|
46
|
+
await search(getStore(), {
|
|
47
|
+
mode: 'vector',
|
|
48
|
+
vector: { value: Array.from(probe), property: 'embedding' },
|
|
49
|
+
limit: 1,
|
|
50
|
+
} as any);
|
|
51
|
+
} catch (err) {
|
|
52
|
+
const msg = err instanceof Error ? err.message : '';
|
|
53
|
+
if (msg.includes('dimensional')) {
|
|
54
|
+
console.error(`[persistence] Dimension mismatch in stored index, discarding stale data`);
|
|
55
|
+
await initStore();
|
|
56
|
+
clearMetadata();
|
|
57
|
+
return false;
|
|
58
|
+
}
|
|
59
|
+
// Other errors are fine (e.g. empty index)
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
const stat = await file.stat();
|
|
63
|
+
if (stat) lastKnownMtimeMs = stat.mtimeMs;
|
|
64
|
+
console.error('[persistence] Index loaded from disk');
|
|
65
|
+
return true;
|
|
66
|
+
}
|
|
67
|
+
} catch (err) {
|
|
68
|
+
console.error('[persistence] Failed to load index from disk, starting fresh:', err);
|
|
69
|
+
}
|
|
70
|
+
return false;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
export async function reloadIfChanged(): Promise<boolean> {
|
|
74
|
+
try {
|
|
75
|
+
const file = Bun.file(ORAMA_PATH);
|
|
76
|
+
if (!(await file.exists())) return false;
|
|
77
|
+
|
|
78
|
+
const stat = await file.stat();
|
|
79
|
+
if (!stat || stat.mtimeMs <= lastKnownMtimeMs) return false;
|
|
80
|
+
|
|
81
|
+
console.error('[persistence] Index file changed on disk, reloading...');
|
|
82
|
+
const data = JSON.parse(await file.text());
|
|
83
|
+
await loadIndex(data);
|
|
84
|
+
lastKnownMtimeMs = stat.mtimeMs;
|
|
85
|
+
console.error('[persistence] Index reloaded from disk');
|
|
86
|
+
return true;
|
|
87
|
+
} catch (err) {
|
|
88
|
+
console.error('[persistence] Failed to reload index:', err);
|
|
89
|
+
return false;
|
|
90
|
+
}
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
export async function flushPersist(): Promise<void> {
|
|
94
|
+
if (persistTimer) {
|
|
95
|
+
clearTimeout(persistTimer);
|
|
96
|
+
persistTimer = null;
|
|
97
|
+
}
|
|
98
|
+
await persistToDisk();
|
|
99
|
+
}
|
package/src/tools/get.ts
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import path from 'path';
|
|
2
|
+
import { config } from '../config.ts';
|
|
3
|
+
|
|
4
|
+
interface GetArgs {
|
|
5
|
+
path: string;
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
export async function handleGet(args: GetArgs): Promise<string> {
|
|
9
|
+
const filePath = path.join(config.docsPath, args.path);
|
|
10
|
+
|
|
11
|
+
try {
|
|
12
|
+
const file = Bun.file(filePath);
|
|
13
|
+
if (!(await file.exists())) {
|
|
14
|
+
return `Error: File not found: ${args.path}`;
|
|
15
|
+
}
|
|
16
|
+
const content = await file.text();
|
|
17
|
+
return `# ${args.path}\n\n${content}`;
|
|
18
|
+
} catch (err) {
|
|
19
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
20
|
+
return `Error reading file: ${msg}`;
|
|
21
|
+
}
|
|
22
|
+
}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { listFiles } from '../store/metadata-db.ts';
|
|
2
|
+
import { getState } from '../indexer/state.ts';
|
|
3
|
+
|
|
4
|
+
interface ListArgs {
|
|
5
|
+
category?: string;
|
|
6
|
+
}
|
|
7
|
+
|
|
8
|
+
export function handleList(args: ListArgs): string {
|
|
9
|
+
const files = listFiles(args.category);
|
|
10
|
+
|
|
11
|
+
if (files.length === 0) {
|
|
12
|
+
const indexState = getState();
|
|
13
|
+
if (indexState.status === 'indexing') {
|
|
14
|
+
return `Sextant is still performing initial indexing (${indexState.filesProcessed}/${indexState.filesFound} files). Document list will be available shortly. Use sextant_status to check progress.`;
|
|
15
|
+
}
|
|
16
|
+
return args.category
|
|
17
|
+
? `No documents found in category "${args.category}".`
|
|
18
|
+
: 'No documents indexed yet.';
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
const lines: string[] = [
|
|
22
|
+
`Indexed documents${args.category ? ` (category: ${args.category})` : ''}: ${files.length} files`,
|
|
23
|
+
'',
|
|
24
|
+
];
|
|
25
|
+
|
|
26
|
+
for (const f of files) {
|
|
27
|
+
lines.push(`- ${f.filePath} [${f.category}] (${f.chunkCount} chunks)${f.title ? ` — ${f.title}` : ''}`);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
return lines.join('\n');
|
|
31
|
+
}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import { config } from '../config.ts';
|
|
2
|
+
import { fullReindex } from '../indexer/pipeline.ts';
|
|
3
|
+
import { getState } from '../indexer/state.ts';
|
|
4
|
+
|
|
5
|
+
interface ReindexArgs {
|
|
6
|
+
clear_existing?: boolean;
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export async function handleReindex(args: ReindexArgs): Promise<string> {
|
|
10
|
+
const indexState = getState();
|
|
11
|
+
if (indexState.status === 'indexing') {
|
|
12
|
+
return `Indexing is already in progress (${indexState.filesProcessed}/${indexState.filesFound} files). Use sextant_status to monitor progress.`;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
const clearExisting = args.clear_existing !== false; // default true
|
|
16
|
+
|
|
17
|
+
console.error(`[reindex] Starting full reindex (clear_existing: ${clearExisting})...`);
|
|
18
|
+
|
|
19
|
+
// Fire and forget -- return immediately
|
|
20
|
+
fullReindex(config.docsPath, clearExisting)
|
|
21
|
+
.then((stats) => {
|
|
22
|
+
console.error(`[reindex] Complete: ${stats.filesProcessed} files, ${stats.chunksCreated} chunks in ${stats.duration}ms`);
|
|
23
|
+
})
|
|
24
|
+
.catch((err) => {
|
|
25
|
+
console.error('[reindex] Failed:', err);
|
|
26
|
+
});
|
|
27
|
+
|
|
28
|
+
return `Reindex started. Use sextant_status to monitor progress.`;
|
|
29
|
+
}
|
|
@@ -0,0 +1,93 @@
|
|
|
1
|
+
import { config } from '../config.ts';
|
|
2
|
+
import { embedQuery, checkOllamaHealth } from '../indexer/embedder.ts';
|
|
3
|
+
import { getState } from '../indexer/state.ts';
|
|
4
|
+
import { checkAndReindex } from '../indexer/freshness.ts';
|
|
5
|
+
import { searchHybrid, searchKeyword, searchVector } from '../store/orama-store.ts';
|
|
6
|
+
import { listFiles } from '../store/metadata-db.ts';
|
|
7
|
+
|
|
8
|
+
interface SearchArgs {
|
|
9
|
+
query: string;
|
|
10
|
+
top_k?: number;
|
|
11
|
+
category?: string;
|
|
12
|
+
search_mode?: 'hybrid' | 'semantic' | 'keyword';
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export async function handleSearch(args: SearchArgs): Promise<string> {
|
|
16
|
+
const { query, category, search_mode = 'hybrid' } = args;
|
|
17
|
+
const topK = Math.min(args.top_k ?? config.defaultTopK, 30);
|
|
18
|
+
|
|
19
|
+
// Check for stale docs and trigger background reindex if needed
|
|
20
|
+
const freshness = await checkAndReindex();
|
|
21
|
+
|
|
22
|
+
const indexState = getState();
|
|
23
|
+
let statusPrefix = '';
|
|
24
|
+
|
|
25
|
+
if (freshness.stale && !freshness.alreadyIndexing) {
|
|
26
|
+
statusPrefix = `Note: ${freshness.staleCount} file(s) changed since last index. Background reindex started. Results may not reflect latest changes.\n\n`;
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
if (!statusPrefix) {
|
|
30
|
+
if (indexState.status === 'indexing') {
|
|
31
|
+
const docCount = listFiles().length;
|
|
32
|
+
if (docCount === 0) {
|
|
33
|
+
return `Sextant is still performing initial indexing (${indexState.filesProcessed}/${indexState.filesFound} files). Search will be available shortly. Use sextant_status to check progress.`;
|
|
34
|
+
}
|
|
35
|
+
statusPrefix = `Note: Indexing is in progress (${indexState.filesProcessed}/${indexState.filesFound} files). Results may be incomplete.\n\n`;
|
|
36
|
+
} else if (indexState.status === 'error') {
|
|
37
|
+
statusPrefix = `Warning: Last indexing failed: ${indexState.lastError}\n\n`;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
try {
|
|
42
|
+
let results;
|
|
43
|
+
|
|
44
|
+
if (search_mode === 'keyword') {
|
|
45
|
+
results = await searchKeyword(query, topK, category);
|
|
46
|
+
} else {
|
|
47
|
+
// Hybrid or semantic both need embeddings
|
|
48
|
+
const ollamaOk = await checkOllamaHealth();
|
|
49
|
+
if (!ollamaOk) {
|
|
50
|
+
if (search_mode === 'semantic') {
|
|
51
|
+
return 'Error: Embedding service unavailable — start Ollama with `ollama serve` and pull `qwen3-embedding:0.6b`. Semantic search requires embeddings.';
|
|
52
|
+
}
|
|
53
|
+
// Fall back to keyword for hybrid
|
|
54
|
+
console.error('[search] Ollama unavailable, falling back to keyword search');
|
|
55
|
+
results = await searchKeyword(query, topK, category);
|
|
56
|
+
} else {
|
|
57
|
+
const queryEmbedding = await embedQuery(query);
|
|
58
|
+
|
|
59
|
+
if (search_mode === 'semantic') {
|
|
60
|
+
results = await searchVector(queryEmbedding, topK, category);
|
|
61
|
+
} else {
|
|
62
|
+
results = await searchHybrid(query, queryEmbedding, topK, category);
|
|
63
|
+
}
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
if (results.hits.length === 0) {
|
|
68
|
+
return statusPrefix + `No results found for "${query}"${category ? ` in category "${category}"` : ''}.`;
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
const lines: string[] = [
|
|
72
|
+
`Found ${results.hits.length} results for "${query}" (mode: ${search_mode}):`,
|
|
73
|
+
'',
|
|
74
|
+
];
|
|
75
|
+
|
|
76
|
+
for (let i = 0; i < results.hits.length; i++) {
|
|
77
|
+
const hit = results.hits[i]!;
|
|
78
|
+
const doc = hit.document as any;
|
|
79
|
+
lines.push(`--- Result ${i + 1} (score: ${hit.score.toFixed(4)}) ---`);
|
|
80
|
+
lines.push(`File: ${doc.filePath}`);
|
|
81
|
+
lines.push(`Section: ${doc.headingSlug}`);
|
|
82
|
+
lines.push(`Category: ${doc.category}`);
|
|
83
|
+
lines.push('');
|
|
84
|
+
lines.push(doc.content);
|
|
85
|
+
lines.push('');
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return statusPrefix + lines.join('\n');
|
|
89
|
+
} catch (err) {
|
|
90
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
91
|
+
return `Search error: ${msg}`;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
import { config } from '../config.ts';
|
|
2
|
+
import { getState } from '../indexer/state.ts';
|
|
3
|
+
import { checkOllamaHealth } from '../indexer/embedder.ts';
|
|
4
|
+
import { listFiles } from '../store/metadata-db.ts';
|
|
5
|
+
|
|
6
|
+
export async function handleStatus(): Promise<string> {
|
|
7
|
+
const state = getState();
|
|
8
|
+
const ollamaOk = await checkOllamaHealth();
|
|
9
|
+
const files = listFiles();
|
|
10
|
+
|
|
11
|
+
const lines: string[] = ['Sextant Status'];
|
|
12
|
+
|
|
13
|
+
// Indexing status
|
|
14
|
+
if (state.status === 'indexing') {
|
|
15
|
+
lines.push(` Indexing: in progress (${state.filesProcessed}/${state.filesFound} files${state.currentFile ? `, current: ${state.currentFile}` : ''})`);
|
|
16
|
+
} else if (state.status === 'ready') {
|
|
17
|
+
const duration = state.startedAt && state.completedAt
|
|
18
|
+
? ((state.completedAt - state.startedAt) / 1000).toFixed(1)
|
|
19
|
+
: '?';
|
|
20
|
+
lines.push(` Indexing: ready (${state.filesProcessed} files, ${state.chunksCreated} chunks, took ${duration}s)`);
|
|
21
|
+
} else if (state.status === 'error') {
|
|
22
|
+
lines.push(` Indexing: error: ${state.lastError}`);
|
|
23
|
+
} else {
|
|
24
|
+
lines.push(` Indexing: idle`);
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
// Ollama health
|
|
28
|
+
if (ollamaOk) {
|
|
29
|
+
lines.push(` Ollama: healthy (${config.embeddingModel} at ${config.ollamaUrl})`);
|
|
30
|
+
} else {
|
|
31
|
+
lines.push(` Ollama: unreachable at ${config.ollamaUrl}`);
|
|
32
|
+
}
|
|
33
|
+
|
|
34
|
+
// Index stats grouped by category
|
|
35
|
+
if (files.length > 0) {
|
|
36
|
+
const byCategory = new Map<string, number>();
|
|
37
|
+
for (const f of files) {
|
|
38
|
+
const cat = f.category ?? 'root';
|
|
39
|
+
byCategory.set(cat, (byCategory.get(cat) ?? 0) + 1);
|
|
40
|
+
}
|
|
41
|
+
lines.push(` Index: ${files.length} files across ${byCategory.size} categories`);
|
|
42
|
+
for (const [cat, count] of [...byCategory.entries()].sort()) {
|
|
43
|
+
lines.push(` ${cat}: ${count} files`);
|
|
44
|
+
}
|
|
45
|
+
} else {
|
|
46
|
+
lines.push(` Index: empty`);
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// Config
|
|
50
|
+
lines.push(` Docs path: ${config.docsPath}`);
|
|
51
|
+
|
|
52
|
+
return lines.join('\n');
|
|
53
|
+
}
|