@tideshift/sextant 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +25 -0
- package/LICENSE +21 -0
- package/README.md +123 -0
- package/package.json +53 -0
- package/src/config.ts +50 -0
- package/src/ignore-files.ts +31 -0
- package/src/index.ts +69 -0
- package/src/indexer/chunker.ts +245 -0
- package/src/indexer/embedder.ts +79 -0
- package/src/indexer/freshness.ts +74 -0
- package/src/indexer/pipeline.ts +161 -0
- package/src/indexer/state.ts +70 -0
- package/src/indexer/types.ts +34 -0
- package/src/scripts/diag.ts +154 -0
- package/src/server.ts +98 -0
- package/src/store/metadata-db.ts +109 -0
- package/src/store/orama-store.ts +172 -0
- package/src/store/persistence.ts +99 -0
- package/src/tools/get.ts +22 -0
- package/src/tools/list.ts +31 -0
- package/src/tools/reindex.ts +29 -0
- package/src/tools/search.ts +93 -0
- package/src/tools/status.ts +53 -0
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
import { config } from '../config.ts';
|
|
2
|
+
|
|
3
|
+
const MAX_RETRIES = 3;
|
|
4
|
+
const INITIAL_BACKOFF_MS = 500;
|
|
5
|
+
|
|
6
|
+
interface OllamaEmbedResponse {
|
|
7
|
+
embeddings: number[][];
|
|
8
|
+
}
|
|
9
|
+
|
|
10
|
+
async function callOllamaEmbed(inputs: string[]): Promise<number[][]> {
|
|
11
|
+
let lastError: Error | null = null;
|
|
12
|
+
|
|
13
|
+
for (let attempt = 0; attempt < MAX_RETRIES; attempt++) {
|
|
14
|
+
try {
|
|
15
|
+
const response = await fetch(`${config.ollamaUrl}/api/embed`, {
|
|
16
|
+
method: 'POST',
|
|
17
|
+
headers: { 'Content-Type': 'application/json' },
|
|
18
|
+
body: JSON.stringify({
|
|
19
|
+
model: config.embeddingModel,
|
|
20
|
+
input: inputs,
|
|
21
|
+
}),
|
|
22
|
+
});
|
|
23
|
+
|
|
24
|
+
if (!response.ok) {
|
|
25
|
+
throw new Error(`Ollama returned ${response.status}: ${await response.text()}`);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
const data = (await response.json()) as OllamaEmbedResponse;
|
|
29
|
+
return data.embeddings;
|
|
30
|
+
} catch (err) {
|
|
31
|
+
lastError = err instanceof Error ? err : new Error(String(err));
|
|
32
|
+
if (attempt < MAX_RETRIES - 1) {
|
|
33
|
+
const delay = INITIAL_BACKOFF_MS * Math.pow(2, attempt);
|
|
34
|
+
await Bun.sleep(delay);
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
throw lastError;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export async function checkOllamaHealth(): Promise<boolean> {
|
|
43
|
+
try {
|
|
44
|
+
const response = await fetch(`${config.ollamaUrl}/api/tags`);
|
|
45
|
+
return response.ok;
|
|
46
|
+
} catch {
|
|
47
|
+
return false;
|
|
48
|
+
}
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
export async function embedTexts(texts: string[], isQuery = false): Promise<number[][]> {
|
|
52
|
+
const instruction = isQuery ? config.queryInstruction : config.indexInstruction;
|
|
53
|
+
const prefixedTexts = texts.map((t) => instruction + t);
|
|
54
|
+
|
|
55
|
+
const allEmbeddings: number[][] = [];
|
|
56
|
+
|
|
57
|
+
for (let i = 0; i < prefixedTexts.length; i += config.embeddingBatchSize) {
|
|
58
|
+
const batch = prefixedTexts.slice(i, i + config.embeddingBatchSize);
|
|
59
|
+
try {
|
|
60
|
+
const embeddings = await callOllamaEmbed(batch);
|
|
61
|
+
allEmbeddings.push(...embeddings);
|
|
62
|
+
} catch (err) {
|
|
63
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
64
|
+
if (msg.includes('fetch') || msg.includes('ECONNREFUSED') || msg.includes('Failed')) {
|
|
65
|
+
throw new Error(
|
|
66
|
+
`Ollama is not reachable at ${config.ollamaUrl}. Ensure Ollama is running with '${config.embeddingModel}' model pulled (run: ollama pull ${config.embeddingModel}).`
|
|
67
|
+
);
|
|
68
|
+
}
|
|
69
|
+
throw err;
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
return allEmbeddings;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
export async function embedQuery(query: string): Promise<number[]> {
|
|
77
|
+
const results = await embedTexts([query], true);
|
|
78
|
+
return results[0]!;
|
|
79
|
+
}
|
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
import path from 'path';
|
|
2
|
+
import { Glob } from 'bun';
|
|
3
|
+
import { config } from '../config.ts';
|
|
4
|
+
import { getFile, getAllFiles } from '../store/metadata-db.ts';
|
|
5
|
+
import { getState } from './state.ts';
|
|
6
|
+
import { indexAll } from './pipeline.ts';
|
|
7
|
+
import { reloadIfChanged } from '../store/persistence.ts';
|
|
8
|
+
|
|
9
|
+
export interface FreshnessResult {
|
|
10
|
+
stale: boolean;
|
|
11
|
+
staleCount?: number;
|
|
12
|
+
alreadyIndexing?: boolean;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
export async function checkAndReindex(): Promise<FreshnessResult> {
|
|
16
|
+
// Pick up any index persisted by another process first
|
|
17
|
+
await reloadIfChanged();
|
|
18
|
+
|
|
19
|
+
const docsPath = config.docsPath;
|
|
20
|
+
|
|
21
|
+
// Scan disk for current markdown files
|
|
22
|
+
const glob = new Glob('**/*.md');
|
|
23
|
+
const diskFiles = new Set<string>();
|
|
24
|
+
for await (const match of glob.scan({ cwd: docsPath, absolute: false })) {
|
|
25
|
+
diskFiles.add(match);
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
// Check for deleted files
|
|
29
|
+
const indexedFiles = getAllFiles();
|
|
30
|
+
const indexedSet = new Set(indexedFiles.map((f) => f.filePath));
|
|
31
|
+
let staleCount = 0;
|
|
32
|
+
|
|
33
|
+
for (const indexed of indexedFiles) {
|
|
34
|
+
if (!diskFiles.has(indexed.filePath)) {
|
|
35
|
+
staleCount++;
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Check for new or modified files
|
|
40
|
+
for (const relPath of diskFiles) {
|
|
41
|
+
const absPath = path.join(docsPath, relPath);
|
|
42
|
+
try {
|
|
43
|
+
const stat = await Bun.file(absPath).stat();
|
|
44
|
+
if (!stat) continue;
|
|
45
|
+
|
|
46
|
+
const existing = getFile(relPath);
|
|
47
|
+
if (!existing) {
|
|
48
|
+
// New file
|
|
49
|
+
staleCount++;
|
|
50
|
+
} else if (stat.mtimeMs > existing.lastModified) {
|
|
51
|
+
// Modified file
|
|
52
|
+
staleCount++;
|
|
53
|
+
}
|
|
54
|
+
} catch {
|
|
55
|
+
// File disappeared between scan and stat, count as stale
|
|
56
|
+
if (indexedSet.has(relPath)) staleCount++;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
if (staleCount === 0) {
|
|
61
|
+
return { stale: false };
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
if (getState().status === 'indexing') {
|
|
65
|
+
return { stale: true, staleCount, alreadyIndexing: true };
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
// Fire-and-forget background reindex
|
|
69
|
+
indexAll(docsPath).catch((err) => {
|
|
70
|
+
console.error('[freshness] Background reindex failed:', err);
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
return { stale: true, staleCount };
|
|
74
|
+
}
|
|
@@ -0,0 +1,161 @@
|
|
|
1
|
+
import path from 'path';
|
|
2
|
+
import { Glob } from 'bun';
|
|
3
|
+
import { config } from '../config.ts';
|
|
4
|
+
import { chunkMarkdown } from './chunker.ts';
|
|
5
|
+
import { embedTexts, checkOllamaHealth } from './embedder.ts';
|
|
6
|
+
import { insertChunks, removeByIds, removeByFile, initStore } from '../store/orama-store.ts';
|
|
7
|
+
import { upsertFile, removeFile as removeFileMeta, getFile, getChunkIds, getAllFiles, clearAll as clearMetadata } from '../store/metadata-db.ts';
|
|
8
|
+
import { persistToDisk } from '../store/persistence.ts';
|
|
9
|
+
import { setIndexing, updateProgress, setReady, setError, isCancelRequested, getState } from './state.ts';
|
|
10
|
+
import type { IndexStats, DocChunk } from './types.ts';
|
|
11
|
+
|
|
12
|
+
export async function indexAll(docsPath: string): Promise<IndexStats> {
|
|
13
|
+
const start = Date.now();
|
|
14
|
+
let filesProcessed = 0;
|
|
15
|
+
let chunksCreated = 0;
|
|
16
|
+
|
|
17
|
+
// Collect all markdown files
|
|
18
|
+
const glob = new Glob('**/*.md');
|
|
19
|
+
const filePaths: string[] = [];
|
|
20
|
+
for await (const match of glob.scan({ cwd: docsPath, absolute: false })) {
|
|
21
|
+
filePaths.push(match);
|
|
22
|
+
}
|
|
23
|
+
|
|
24
|
+
// Remove files from the index that no longer exist on disk
|
|
25
|
+
const diskFileSet = new Set(filePaths);
|
|
26
|
+
const indexedFiles = getAllFiles();
|
|
27
|
+
let deletionsPerformed = false;
|
|
28
|
+
for (const indexed of indexedFiles) {
|
|
29
|
+
if (!diskFileSet.has(indexed.filePath)) {
|
|
30
|
+
console.error(`[pipeline] Removing deleted file from index: ${indexed.filePath}`);
|
|
31
|
+
const knownIds = getChunkIds(indexed.filePath);
|
|
32
|
+
if (knownIds) {
|
|
33
|
+
await removeByIds(knownIds);
|
|
34
|
+
} else {
|
|
35
|
+
await removeByFile(indexed.filePath);
|
|
36
|
+
}
|
|
37
|
+
removeFileMeta(indexed.filePath);
|
|
38
|
+
deletionsPerformed = true;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
setIndexing(filePaths.length);
|
|
43
|
+
|
|
44
|
+
if (filePaths.length === 0) {
|
|
45
|
+
console.error('[pipeline] No markdown files found in', docsPath);
|
|
46
|
+
setReady({ filesProcessed: 0, chunksCreated: 0 });
|
|
47
|
+
return { filesProcessed: 0, chunksCreated: 0, duration: Date.now() - start };
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
console.error(`[pipeline] Found ${filePaths.length} markdown files`);
|
|
51
|
+
|
|
52
|
+
// Check Ollama availability
|
|
53
|
+
const ollamaOk = await checkOllamaHealth();
|
|
54
|
+
if (!ollamaOk) {
|
|
55
|
+
console.error(`[pipeline] WARNING: Ollama not reachable at ${config.ollamaUrl}. Indexing will fail for embeddings.`);
|
|
56
|
+
console.error(`[pipeline] Run: ollama pull ${config.embeddingModel}`);
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
// Chunk all files
|
|
60
|
+
const allChunks: DocChunk[] = [];
|
|
61
|
+
const fileMetadata: { filePath: string; lastModified: number; chunkCount: number; chunkIds: string[]; title: string | null; category: string }[] = [];
|
|
62
|
+
|
|
63
|
+
for (const relPath of filePaths) {
|
|
64
|
+
if (isCancelRequested()) {
|
|
65
|
+
console.error('[pipeline] Indexing cancelled');
|
|
66
|
+
setError('Indexing cancelled');
|
|
67
|
+
return { filesProcessed, chunksCreated, duration: Date.now() - start };
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
updateProgress(filesProcessed, chunksCreated, relPath);
|
|
71
|
+
const absPath = path.join(docsPath, relPath);
|
|
72
|
+
try {
|
|
73
|
+
const file = Bun.file(absPath);
|
|
74
|
+
const stat = await file.stat();
|
|
75
|
+
if (!stat) continue;
|
|
76
|
+
const lastModified = stat.mtimeMs;
|
|
77
|
+
|
|
78
|
+
// Check if file needs re-indexing
|
|
79
|
+
const existing = getFile(relPath);
|
|
80
|
+
if (existing && existing.lastModified >= lastModified) {
|
|
81
|
+
console.error(`[pipeline] Skipping unchanged: ${relPath}`);
|
|
82
|
+
continue;
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
const content = await file.text();
|
|
86
|
+
const { chunks, metadata } = chunkMarkdown(absPath, content, lastModified, docsPath);
|
|
87
|
+
|
|
88
|
+
if (chunks.length > 0) {
|
|
89
|
+
// Remove old chunks before inserting to avoid duplicate ID collisions
|
|
90
|
+
const knownIds = getChunkIds(relPath);
|
|
91
|
+
if (knownIds) {
|
|
92
|
+
await removeByIds(knownIds);
|
|
93
|
+
} else {
|
|
94
|
+
await removeByFile(relPath);
|
|
95
|
+
}
|
|
96
|
+
allChunks.push(...chunks);
|
|
97
|
+
fileMetadata.push({
|
|
98
|
+
filePath: relPath,
|
|
99
|
+
lastModified,
|
|
100
|
+
chunkCount: chunks.length,
|
|
101
|
+
chunkIds: chunks.map((c) => c.id),
|
|
102
|
+
title: metadata.title ?? chunks[0]?.headingHierarchy[0] ?? null,
|
|
103
|
+
category: chunks[0]?.category ?? 'root',
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
|
|
107
|
+
filesProcessed++;
|
|
108
|
+
} catch (err) {
|
|
109
|
+
console.error(`[pipeline] Error processing ${relPath}:`, err);
|
|
110
|
+
}
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
if (allChunks.length === 0) {
|
|
114
|
+
console.error('[pipeline] No new chunks to index');
|
|
115
|
+
if (deletionsPerformed) await persistToDisk();
|
|
116
|
+
setReady({ filesProcessed, chunksCreated: 0 });
|
|
117
|
+
return { filesProcessed, chunksCreated: 0, duration: Date.now() - start };
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
// Embed all chunks in batches
|
|
121
|
+
console.error(`[pipeline] Embedding ${allChunks.length} chunks...`);
|
|
122
|
+
try {
|
|
123
|
+
const texts = allChunks.map((c) => c.content);
|
|
124
|
+
const embeddings = await embedTexts(texts);
|
|
125
|
+
|
|
126
|
+
// Insert into Orama
|
|
127
|
+
await insertChunks(allChunks, embeddings);
|
|
128
|
+
chunksCreated = allChunks.length;
|
|
129
|
+
|
|
130
|
+
// Update metadata DB
|
|
131
|
+
for (const fm of fileMetadata) {
|
|
132
|
+
upsertFile(fm.filePath, fm.lastModified, fm.chunkCount, fm.title, fm.category, fm.chunkIds);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
// Persist
|
|
136
|
+
await persistToDisk();
|
|
137
|
+
|
|
138
|
+
console.error(`[pipeline] Indexed ${filesProcessed} files, ${chunksCreated} chunks in ${Date.now() - start}ms`);
|
|
139
|
+
setReady({ filesProcessed, chunksCreated });
|
|
140
|
+
} catch (err) {
|
|
141
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
142
|
+
setError(msg);
|
|
143
|
+
console.error('[pipeline] Embedding/indexing failed:', err);
|
|
144
|
+
throw err;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
return { filesProcessed, chunksCreated, duration: Date.now() - start };
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
export async function fullReindex(docsPath: string, clearExisting: boolean): Promise<IndexStats> {
|
|
151
|
+
if (getState().status === 'indexing') {
|
|
152
|
+
return { filesProcessed: 0, chunksCreated: 0, duration: 0 };
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
if (clearExisting) {
|
|
156
|
+
console.error('[pipeline] Clearing existing index...');
|
|
157
|
+
await initStore();
|
|
158
|
+
clearMetadata();
|
|
159
|
+
}
|
|
160
|
+
return indexAll(docsPath);
|
|
161
|
+
}
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
export type IndexingStatus = 'idle' | 'indexing' | 'ready' | 'error';
|
|
2
|
+
|
|
3
|
+
export interface IndexingState {
|
|
4
|
+
status: IndexingStatus;
|
|
5
|
+
filesFound: number;
|
|
6
|
+
filesProcessed: number;
|
|
7
|
+
chunksCreated: number;
|
|
8
|
+
currentFile: string | null;
|
|
9
|
+
startedAt: number | null;
|
|
10
|
+
completedAt: number | null;
|
|
11
|
+
lastError: string | null;
|
|
12
|
+
cancelRequested: boolean;
|
|
13
|
+
}
|
|
14
|
+
|
|
15
|
+
const state: IndexingState = {
|
|
16
|
+
status: 'idle',
|
|
17
|
+
filesFound: 0,
|
|
18
|
+
filesProcessed: 0,
|
|
19
|
+
chunksCreated: 0,
|
|
20
|
+
currentFile: null,
|
|
21
|
+
startedAt: null,
|
|
22
|
+
completedAt: null,
|
|
23
|
+
lastError: null,
|
|
24
|
+
cancelRequested: false,
|
|
25
|
+
};
|
|
26
|
+
|
|
27
|
+
export function getState(): Readonly<IndexingState> {
|
|
28
|
+
return { ...state };
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
export function setIndexing(filesFound: number): void {
|
|
32
|
+
state.status = 'indexing';
|
|
33
|
+
state.filesFound = filesFound;
|
|
34
|
+
state.filesProcessed = 0;
|
|
35
|
+
state.chunksCreated = 0;
|
|
36
|
+
state.currentFile = null;
|
|
37
|
+
state.startedAt = Date.now();
|
|
38
|
+
state.completedAt = null;
|
|
39
|
+
state.lastError = null;
|
|
40
|
+
state.cancelRequested = false;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
export function updateProgress(filesProcessed: number, chunksCreated: number, currentFile: string): void {
|
|
44
|
+
state.filesProcessed = filesProcessed;
|
|
45
|
+
state.chunksCreated = chunksCreated;
|
|
46
|
+
state.currentFile = currentFile;
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
export function setReady(stats: { filesProcessed: number; chunksCreated: number }): void {
|
|
50
|
+
state.status = 'ready';
|
|
51
|
+
state.filesProcessed = stats.filesProcessed;
|
|
52
|
+
state.chunksCreated = stats.chunksCreated;
|
|
53
|
+
state.currentFile = null;
|
|
54
|
+
state.completedAt = Date.now();
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
export function setError(msg: string): void {
|
|
58
|
+
state.status = 'error';
|
|
59
|
+
state.lastError = msg;
|
|
60
|
+
state.currentFile = null;
|
|
61
|
+
state.completedAt = Date.now();
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
export function requestCancel(): void {
|
|
65
|
+
state.cancelRequested = true;
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
export function isCancelRequested(): boolean {
|
|
69
|
+
return state.cancelRequested;
|
|
70
|
+
}
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
export interface DocChunk {
|
|
2
|
+
id: string;
|
|
3
|
+
filePath: string;
|
|
4
|
+
fileName: string;
|
|
5
|
+
category: string;
|
|
6
|
+
headingHierarchy: string[];
|
|
7
|
+
headingSlug: string;
|
|
8
|
+
chunkIndex: number;
|
|
9
|
+
content: string;
|
|
10
|
+
charCount: number;
|
|
11
|
+
lastModified: number;
|
|
12
|
+
}
|
|
13
|
+
|
|
14
|
+
export interface ChunkMetadata {
|
|
15
|
+
title?: string;
|
|
16
|
+
tags?: string[];
|
|
17
|
+
category?: string;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
export interface IndexStats {
|
|
21
|
+
filesProcessed: number;
|
|
22
|
+
chunksCreated: number;
|
|
23
|
+
duration: number;
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
export interface IndexedFile {
|
|
27
|
+
filePath: string;
|
|
28
|
+
lastModified: number;
|
|
29
|
+
chunkCount: number;
|
|
30
|
+
title: string | null;
|
|
31
|
+
category: string | null;
|
|
32
|
+
indexedAt: number;
|
|
33
|
+
chunkIds: string | null;
|
|
34
|
+
}
|
|
@@ -0,0 +1,154 @@
|
|
|
1
|
+
import { config } from '../config.ts';
|
|
2
|
+
import { initMetadataDb } from '../store/metadata-db.ts';
|
|
3
|
+
import { initStore, getStore, insertChunks } from '../store/orama-store.ts';
|
|
4
|
+
import { search } from '@orama/orama';
|
|
5
|
+
import { embedTexts, embedQuery, checkOllamaHealth } from '../indexer/embedder.ts';
|
|
6
|
+
import { chunkMarkdown } from '../indexer/chunker.ts';
|
|
7
|
+
import path from 'path';
|
|
8
|
+
import { mkdirSync, writeFileSync } from 'fs';
|
|
9
|
+
|
|
10
|
+
const TEST_DOC = `# Grip System
|
|
11
|
+
|
|
12
|
+
The grip system allows characters to pick up and hold objects realistically.
|
|
13
|
+
It uses inverse kinematics to adjust hand positions based on the object's shape.
|
|
14
|
+
|
|
15
|
+
## How It Works
|
|
16
|
+
|
|
17
|
+
When a character approaches an object, the system calculates grip points
|
|
18
|
+
based on the object's collision mesh. The IK solver then positions the
|
|
19
|
+
character's hands to match these grip points naturally.
|
|
20
|
+
|
|
21
|
+
## Configuration
|
|
22
|
+
|
|
23
|
+
You can configure grip strength, release threshold, and IK blend speed
|
|
24
|
+
in the grip component settings.
|
|
25
|
+
`;
|
|
26
|
+
|
|
27
|
+
async function diag() {
|
|
28
|
+
console.log('=== Sextant Embedding Diagnostic ===\n');
|
|
29
|
+
|
|
30
|
+
// Step 1: Check Ollama
|
|
31
|
+
console.log('1. Checking Ollama health...');
|
|
32
|
+
const healthy = await checkOllamaHealth();
|
|
33
|
+
console.log(` Ollama: ${healthy ? 'healthy' : 'UNREACHABLE'}`);
|
|
34
|
+
console.log(` Model: ${config.embeddingModel}`);
|
|
35
|
+
console.log(` Dims: ${config.embeddingDims}`);
|
|
36
|
+
if (!healthy) {
|
|
37
|
+
console.log(' STOPPING: Ollama must be running');
|
|
38
|
+
return;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
// Step 2: Test raw embedding
|
|
42
|
+
console.log('\n2. Testing raw embedding call...');
|
|
43
|
+
try {
|
|
44
|
+
const embeddings = await embedTexts(['Hello world test']);
|
|
45
|
+
console.log(` Got ${embeddings.length} embedding(s)`);
|
|
46
|
+
console.log(` Dimensions: ${embeddings[0]?.length}`);
|
|
47
|
+
console.log(` First 5 values: [${embeddings[0]?.slice(0, 5).map(v => v.toFixed(6)).join(', ')}]`);
|
|
48
|
+
const allZero = embeddings[0]?.every(v => v === 0);
|
|
49
|
+
console.log(` All zeros: ${allZero}`);
|
|
50
|
+
if (embeddings[0]?.length !== config.embeddingDims) {
|
|
51
|
+
console.log(` WARNING: Model returns ${embeddings[0]?.length} dims but config expects ${config.embeddingDims}`);
|
|
52
|
+
}
|
|
53
|
+
} catch (err) {
|
|
54
|
+
console.log(` ERROR: ${err}`);
|
|
55
|
+
return;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
// Step 3: Create fresh store and index test doc
|
|
59
|
+
console.log('\n3. Creating fresh store and indexing test doc...');
|
|
60
|
+
mkdirSync(config.dataPath, { recursive: true });
|
|
61
|
+
initMetadataDb();
|
|
62
|
+
await initStore();
|
|
63
|
+
|
|
64
|
+
const testDocPath = path.join(config.docsPath, '_diag_test.md');
|
|
65
|
+
writeFileSync(testDocPath, TEST_DOC);
|
|
66
|
+
|
|
67
|
+
const { chunks } = chunkMarkdown(testDocPath, TEST_DOC, Date.now(), config.docsPath);
|
|
68
|
+
console.log(` Chunks created: ${chunks.length}`);
|
|
69
|
+
for (const c of chunks) {
|
|
70
|
+
console.log(` - "${c.headingSlug}" (${c.content.length} chars)`);
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// Step 4: Embed chunks
|
|
74
|
+
console.log('\n4. Embedding chunks...');
|
|
75
|
+
const texts = chunks.map(c => c.content);
|
|
76
|
+
const embeddings = await embedTexts(texts);
|
|
77
|
+
console.log(` Got ${embeddings.length} embeddings`);
|
|
78
|
+
for (let i = 0; i < embeddings.length; i++) {
|
|
79
|
+
const emb = embeddings[i]!;
|
|
80
|
+
console.log(` Chunk ${i}: ${emb.length} dims, first 3: [${emb.slice(0, 3).map(v => v.toFixed(6)).join(', ')}], allZero: ${emb.every(v => v === 0)}`);
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// Step 5: Insert into store
|
|
84
|
+
console.log('\n5. Inserting into Orama...');
|
|
85
|
+
await insertChunks(chunks, embeddings);
|
|
86
|
+
console.log(' Insert successful');
|
|
87
|
+
|
|
88
|
+
// Step 6: Verify with keyword search
|
|
89
|
+
console.log('\n6. Keyword search for "grip"...');
|
|
90
|
+
const kwResults = await search(getStore(), { mode: 'fulltext', term: 'grip', limit: 3 } as any);
|
|
91
|
+
console.log(` Hits: ${kwResults.hits.length}`);
|
|
92
|
+
for (const hit of kwResults.hits) {
|
|
93
|
+
const doc = hit.document as any;
|
|
94
|
+
console.log(` - score=${hit.score.toFixed(4)}, section="${doc.headingSlug}"`);
|
|
95
|
+
const emb = doc.embedding;
|
|
96
|
+
if (Array.isArray(emb)) {
|
|
97
|
+
console.log(` stored embedding: ${emb.length} dims, first 3: [${emb.slice(0, 3).map((v: number) => v.toFixed(6)).join(', ')}]`);
|
|
98
|
+
} else {
|
|
99
|
+
console.log(` stored embedding: ${typeof emb} (NOT an array!)`);
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// Step 7: Vector search
|
|
104
|
+
console.log('\n7. Vector search for "grip system"...');
|
|
105
|
+
try {
|
|
106
|
+
const qEmb = await embedQuery('grip system');
|
|
107
|
+
console.log(` Query embedding: ${qEmb.length} dims`);
|
|
108
|
+
|
|
109
|
+
const vecResults = await search(getStore(), {
|
|
110
|
+
mode: 'vector',
|
|
111
|
+
vector: { value: qEmb, property: 'embedding' },
|
|
112
|
+
similarity: config.similarityThreshold,
|
|
113
|
+
limit: 3,
|
|
114
|
+
} as any);
|
|
115
|
+
console.log(` Hits: ${vecResults.hits.length}`);
|
|
116
|
+
for (const hit of vecResults.hits) {
|
|
117
|
+
const doc = hit.document as any;
|
|
118
|
+
console.log(` - score=${hit.score.toFixed(4)}, section="${doc.headingSlug}"`);
|
|
119
|
+
}
|
|
120
|
+
} catch (err) {
|
|
121
|
+
console.log(` ERROR: ${err}`);
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
// Step 8: Hybrid search
|
|
125
|
+
console.log('\n8. Hybrid search for "picking up objects"...');
|
|
126
|
+
try {
|
|
127
|
+
const qEmb = await embedQuery('picking up objects');
|
|
128
|
+
const hybridResults = await search(getStore(), {
|
|
129
|
+
mode: 'hybrid',
|
|
130
|
+
term: 'picking up objects',
|
|
131
|
+
vector: { value: qEmb, property: 'embedding' },
|
|
132
|
+
similarity: config.similarityThreshold,
|
|
133
|
+
limit: 3,
|
|
134
|
+
hybridWeights: { text: 0.5, vector: 0.5 },
|
|
135
|
+
} as any);
|
|
136
|
+
console.log(` Hits: ${hybridResults.hits.length}`);
|
|
137
|
+
for (const hit of hybridResults.hits) {
|
|
138
|
+
const doc = hit.document as any;
|
|
139
|
+
console.log(` - score=${hit.score.toFixed(4)}, section="${doc.headingSlug}"`);
|
|
140
|
+
}
|
|
141
|
+
} catch (err) {
|
|
142
|
+
console.log(` ERROR: ${err}`);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
// Cleanup
|
|
146
|
+
try {
|
|
147
|
+
const { unlinkSync } = require('fs');
|
|
148
|
+
unlinkSync(testDocPath);
|
|
149
|
+
} catch {}
|
|
150
|
+
|
|
151
|
+
console.log('\n=== Done ===');
|
|
152
|
+
}
|
|
153
|
+
|
|
154
|
+
diag().catch(console.error);
|
package/src/server.ts
ADDED
|
@@ -0,0 +1,98 @@
|
|
|
1
|
+
import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
|
|
2
|
+
import { z } from 'zod';
|
|
3
|
+
import { handleSearch } from './tools/search.ts';
|
|
4
|
+
import { handleList } from './tools/list.ts';
|
|
5
|
+
import { handleGet } from './tools/get.ts';
|
|
6
|
+
import { handleReindex } from './tools/reindex.ts';
|
|
7
|
+
import { handleStatus } from './tools/status.ts';
|
|
8
|
+
|
|
9
|
+
export function createMcpServer(): McpServer {
|
|
10
|
+
const server = new McpServer(
|
|
11
|
+
{ name: 'sextant', version: '0.1.0' },
|
|
12
|
+
{
|
|
13
|
+
instructions: 'Sextant provides hybrid semantic and keyword search over project documentation. Use its tools to find information about architecture, decisions, guides, issues, worklogs, plans, and any project knowledge stored in markdown files.',
|
|
14
|
+
},
|
|
15
|
+
);
|
|
16
|
+
|
|
17
|
+
// search_docs tool
|
|
18
|
+
server.tool(
|
|
19
|
+
'search_docs',
|
|
20
|
+
'Search project documentation using hybrid semantic + keyword search. Returns the most relevant chunks from the docs folder. Use this to find information about architecture, decisions, guides, issues, worklogs, plans, and any project knowledge.',
|
|
21
|
+
{
|
|
22
|
+
query: z.string().describe(
|
|
23
|
+
"Natural language search query OR exact keyword/identifier (e.g., 'how does replication work' or 'ISM-247')"
|
|
24
|
+
),
|
|
25
|
+
top_k: z.number().optional().describe('Number of results to return (default: 10, max: 30)'),
|
|
26
|
+
category: z.string().optional().describe(
|
|
27
|
+
"Optional: filter by doc category/folder (e.g., 'architecture', 'worklogs', 'issues')"
|
|
28
|
+
),
|
|
29
|
+
search_mode: z
|
|
30
|
+
.enum(['hybrid', 'semantic', 'keyword'])
|
|
31
|
+
.optional()
|
|
32
|
+
.describe(
|
|
33
|
+
"Search mode: 'hybrid' (default, best recall), 'semantic' (conceptual similarity only), 'keyword' (exact/token match only)"
|
|
34
|
+
),
|
|
35
|
+
},
|
|
36
|
+
async (args) => {
|
|
37
|
+
const result = await handleSearch(args);
|
|
38
|
+
return { content: [{ type: 'text', text: result }] };
|
|
39
|
+
}
|
|
40
|
+
);
|
|
41
|
+
|
|
42
|
+
// list_docs tool
|
|
43
|
+
server.tool(
|
|
44
|
+
'list_docs',
|
|
45
|
+
'List all indexed documents, optionally filtered by category. Returns file paths, categories, and document titles.',
|
|
46
|
+
{
|
|
47
|
+
category: z.string().optional().describe('Optional: filter by category/folder name'),
|
|
48
|
+
},
|
|
49
|
+
async (args) => {
|
|
50
|
+
const result = handleList(args);
|
|
51
|
+
return { content: [{ type: 'text', text: result }] };
|
|
52
|
+
}
|
|
53
|
+
);
|
|
54
|
+
|
|
55
|
+
// get_doc tool
|
|
56
|
+
server.tool(
|
|
57
|
+
'get_doc',
|
|
58
|
+
'Retrieve the full content of a specific document by its file path. Use after search_docs to read a complete document.',
|
|
59
|
+
{
|
|
60
|
+
path: z.string().describe(
|
|
61
|
+
"Relative file path within the docs folder (e.g., 'architecture/networking.md')"
|
|
62
|
+
),
|
|
63
|
+
},
|
|
64
|
+
async (args) => {
|
|
65
|
+
const result = await handleGet(args);
|
|
66
|
+
return { content: [{ type: 'text', text: result }] };
|
|
67
|
+
}
|
|
68
|
+
);
|
|
69
|
+
|
|
70
|
+
// reindex_docs tool
|
|
71
|
+
server.tool(
|
|
72
|
+
'reindex_docs',
|
|
73
|
+
'Force a full re-index of all documents. Use if search results seem stale or docs have been updated.',
|
|
74
|
+
{
|
|
75
|
+
clear_existing: z
|
|
76
|
+
.boolean()
|
|
77
|
+
.optional()
|
|
78
|
+
.describe('If true, wipe all existing index data before re-indexing (default: true)'),
|
|
79
|
+
},
|
|
80
|
+
async (args) => {
|
|
81
|
+
const result = await handleReindex(args);
|
|
82
|
+
return { content: [{ type: 'text', text: result }] };
|
|
83
|
+
}
|
|
84
|
+
);
|
|
85
|
+
|
|
86
|
+
// sextant_status tool
|
|
87
|
+
server.tool(
|
|
88
|
+
'sextant_status',
|
|
89
|
+
'Check Sextant health: indexing progress, Ollama connectivity, and index stats.',
|
|
90
|
+
{},
|
|
91
|
+
async () => {
|
|
92
|
+
const result = await handleStatus();
|
|
93
|
+
return { content: [{ type: 'text', text: result }] };
|
|
94
|
+
}
|
|
95
|
+
);
|
|
96
|
+
|
|
97
|
+
return server;
|
|
98
|
+
}
|