@aeriondyseti/vector-memory-mcp 1.1.0-dev.2 → 1.1.0-dev.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -11
- package/dist/package.json +1 -2
- package/dist/src/config/index.d.ts +17 -10
- package/dist/src/config/index.d.ts.map +1 -1
- package/dist/src/config/index.js +25 -11
- package/dist/src/config/index.js.map +1 -1
- package/dist/src/db/conversation.repository.d.ts +26 -0
- package/dist/src/db/conversation.repository.d.ts.map +1 -0
- package/dist/src/db/conversation.repository.js +72 -0
- package/dist/src/db/conversation.repository.js.map +1 -0
- package/dist/src/db/conversation.schema.d.ts +4 -0
- package/dist/src/db/conversation.schema.d.ts.map +1 -0
- package/dist/src/db/conversation.schema.js +15 -0
- package/dist/src/db/conversation.schema.js.map +1 -0
- package/dist/src/db/lancedb-utils.d.ts +13 -3
- package/dist/src/db/lancedb-utils.d.ts.map +1 -1
- package/dist/src/db/lancedb-utils.js +36 -7
- package/dist/src/db/lancedb-utils.js.map +1 -1
- package/dist/src/db/memory.repository.d.ts +1 -0
- package/dist/src/db/memory.repository.d.ts.map +1 -1
- package/dist/src/db/memory.repository.js +18 -7
- package/dist/src/db/memory.repository.js.map +1 -1
- package/dist/src/http/server.d.ts.map +1 -1
- package/dist/src/http/server.js +38 -24
- package/dist/src/http/server.js.map +1 -1
- package/dist/src/index.js +7 -6
- package/dist/src/index.js.map +1 -1
- package/dist/src/mcp/handlers.d.ts +3 -3
- package/dist/src/mcp/handlers.d.ts.map +1 -1
- package/dist/src/mcp/handlers.js +128 -145
- package/dist/src/mcp/handlers.js.map +1 -1
- package/dist/src/mcp/tools.d.ts +2 -2
- package/dist/src/mcp/tools.d.ts.map +1 -1
- package/dist/src/mcp/tools.js +53 -24
- package/dist/src/mcp/tools.js.map +1 -1
- package/dist/src/services/conversation.service.d.ts +38 -0
- package/dist/src/services/conversation.service.d.ts.map +1 -0
- package/dist/src/services/conversation.service.js +252 -0
- package/dist/src/services/conversation.service.js.map +1 -0
- package/dist/src/services/memory.service.d.ts +10 -27
- package/dist/src/services/memory.service.d.ts.map +1 -1
- package/dist/src/services/memory.service.js +94 -94
- package/dist/src/services/memory.service.js.map +1 -1
- package/dist/src/services/parsers/claude-code.parser.d.ts +8 -0
- package/dist/src/services/parsers/claude-code.parser.d.ts.map +1 -0
- package/dist/src/services/parsers/claude-code.parser.js +191 -0
- package/dist/src/services/parsers/claude-code.parser.js.map +1 -0
- package/dist/src/services/parsers/types.d.ts +9 -0
- package/dist/src/services/parsers/types.d.ts.map +1 -0
- package/dist/src/services/parsers/types.js +2 -0
- package/dist/src/services/parsers/types.js.map +1 -0
- package/dist/src/types/conversation.d.ts +99 -0
- package/dist/src/types/conversation.d.ts.map +1 -0
- package/dist/src/types/conversation.js +2 -0
- package/dist/src/types/conversation.js.map +1 -0
- package/package.json +1 -2
- package/src/config/index.ts +39 -21
- package/src/db/conversation.repository.ts +120 -0
- package/src/db/conversation.schema.ts +33 -0
- package/src/db/lancedb-utils.ts +35 -7
- package/src/db/memory.repository.ts +18 -7
- package/src/http/server.ts +43 -25
- package/src/index.ts +10 -11
- package/src/mcp/handlers.ts +144 -151
- package/src/mcp/tools.ts +54 -25
- package/src/services/conversation.service.ts +354 -0
- package/src/services/memory.service.ts +136 -119
- package/src/services/parsers/claude-code.parser.ts +242 -0
- package/src/services/parsers/types.ts +14 -0
- package/src/types/conversation.ts +108 -0
- package/dist/src/db/conversation-history.repository.d.ts +0 -24
- package/dist/src/db/conversation-history.repository.d.ts.map +0 -1
- package/dist/src/db/conversation-history.repository.js +0 -184
- package/dist/src/db/conversation-history.repository.js.map +0 -1
- package/dist/src/db/conversation-history.schema.d.ts +0 -10
- package/dist/src/db/conversation-history.schema.d.ts.map +0 -1
- package/dist/src/db/conversation-history.schema.js +0 -31
- package/dist/src/db/conversation-history.schema.js.map +0 -1
- package/dist/src/services/conversation-history.service.d.ts +0 -64
- package/dist/src/services/conversation-history.service.d.ts.map +0 -1
- package/dist/src/services/conversation-history.service.js +0 -244
- package/dist/src/services/conversation-history.service.js.map +0 -1
- package/dist/src/services/session-parser.d.ts +0 -59
- package/dist/src/services/session-parser.d.ts.map +0 -1
- package/dist/src/services/session-parser.js +0 -147
- package/dist/src/services/session-parser.js.map +0 -1
- package/dist/src/types/conversation-history.d.ts +0 -74
- package/dist/src/types/conversation-history.d.ts.map +0 -1
- package/dist/src/types/conversation-history.js +0 -2
- package/dist/src/types/conversation-history.js.map +0 -1
- package/hooks/session-start.ts +0 -100
- package/src/db/conversation-history.repository.ts +0 -255
- package/src/db/conversation-history.schema.ts +0 -40
- package/src/services/conversation-history.service.ts +0 -320
- package/src/services/session-parser.ts +0 -232
- package/src/types/conversation-history.ts +0 -82
package/hooks/session-start.ts
DELETED
|
@@ -1,100 +0,0 @@
|
|
|
1
|
-
#!/usr/bin/env bun
|
|
2
|
-
/**
|
|
3
|
-
* SessionStart hook for Claude Code
|
|
4
|
-
*
|
|
5
|
-
* Fetches config from the running vector-memory server's /health endpoint,
|
|
6
|
-
* then retrieves and outputs the latest checkpoint.
|
|
7
|
-
*
|
|
8
|
-
* Requires the server to be running with HTTP enabled.
|
|
9
|
-
*
|
|
10
|
-
* Usage in ~/.claude/settings.json:
|
|
11
|
-
* {
|
|
12
|
-
* "hooks": {
|
|
13
|
-
* "SessionStart": [{
|
|
14
|
-
* "hooks": [{
|
|
15
|
-
* "type": "command",
|
|
16
|
-
* "command": "bun /path/to/vector-memory-mcp/hooks/session-start.ts"
|
|
17
|
-
* }]
|
|
18
|
-
* }]
|
|
19
|
-
* }
|
|
20
|
-
* }
|
|
21
|
-
*/
|
|
22
|
-
|
|
23
|
-
import { existsSync } from "fs";
|
|
24
|
-
import { connectToDatabase } from "../src/db/connection.js";
|
|
25
|
-
import { MemoryRepository } from "../src/db/memory.repository.js";
|
|
26
|
-
import { EmbeddingsService } from "../src/services/embeddings.service.js";
|
|
27
|
-
import { MemoryService } from "../src/services/memory.service.js";
|
|
28
|
-
|
|
29
|
-
const VECTOR_MEMORY_URL = process.env.VECTOR_MEMORY_URL ?? "http://127.0.0.1:3271";
|
|
30
|
-
|
|
31
|
-
interface HealthResponse {
|
|
32
|
-
status: string;
|
|
33
|
-
config: {
|
|
34
|
-
dbPath: string;
|
|
35
|
-
embeddingModel: string;
|
|
36
|
-
embeddingDimension: number;
|
|
37
|
-
};
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
async function main() {
|
|
41
|
-
// Get config from running server
|
|
42
|
-
let health: HealthResponse;
|
|
43
|
-
try {
|
|
44
|
-
const response = await fetch(`${VECTOR_MEMORY_URL}/health`);
|
|
45
|
-
if (!response.ok) {
|
|
46
|
-
throw new Error(`Server returned ${response.status}`);
|
|
47
|
-
}
|
|
48
|
-
health = await response.json();
|
|
49
|
-
} catch (error) {
|
|
50
|
-
if (error instanceof Error && error.message.includes("ECONNREFUSED")) {
|
|
51
|
-
console.log("Vector memory server not running. Starting fresh session.");
|
|
52
|
-
return;
|
|
53
|
-
}
|
|
54
|
-
throw error;
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
const { dbPath, embeddingModel, embeddingDimension } = health.config;
|
|
58
|
-
|
|
59
|
-
// Check if DB exists
|
|
60
|
-
if (!existsSync(dbPath)) {
|
|
61
|
-
console.log("Vector memory database not found. Starting fresh session.");
|
|
62
|
-
return;
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
const db = await connectToDatabase(dbPath);
|
|
66
|
-
const repository = new MemoryRepository(db);
|
|
67
|
-
const embeddings = new EmbeddingsService(embeddingModel, embeddingDimension);
|
|
68
|
-
const service = new MemoryService(repository, embeddings);
|
|
69
|
-
|
|
70
|
-
const checkpoint = await service.getLatestCheckpoint();
|
|
71
|
-
|
|
72
|
-
if (!checkpoint) {
|
|
73
|
-
console.log("No checkpoint found. Starting fresh session.");
|
|
74
|
-
return;
|
|
75
|
-
}
|
|
76
|
-
|
|
77
|
-
// Fetch referenced memories if any
|
|
78
|
-
const memoryIds = (checkpoint.metadata.memory_ids as string[] | undefined) ?? [];
|
|
79
|
-
let memoriesSection = "";
|
|
80
|
-
|
|
81
|
-
if (memoryIds.length > 0) {
|
|
82
|
-
const memories: string[] = [];
|
|
83
|
-
for (const id of memoryIds) {
|
|
84
|
-
const memory = await service.get(id);
|
|
85
|
-
if (memory) {
|
|
86
|
-
memories.push(`### Memory: ${id}\n${memory.content}`);
|
|
87
|
-
}
|
|
88
|
-
}
|
|
89
|
-
if (memories.length > 0) {
|
|
90
|
-
memoriesSection = `\n\n## Referenced Memories\n\n${memories.join("\n\n")}`;
|
|
91
|
-
}
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
console.log(checkpoint.content + memoriesSection);
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
main().catch((err) => {
|
|
98
|
-
console.error("Error loading checkpoint:", err.message);
|
|
99
|
-
process.exit(1);
|
|
100
|
-
});
|
|
@@ -1,255 +0,0 @@
|
|
|
1
|
-
import * as lancedb from "@lancedb/lancedb";
|
|
2
|
-
import { type Table } from "@lancedb/lancedb";
|
|
3
|
-
import {
|
|
4
|
-
CONVERSATION_HISTORY_TABLE,
|
|
5
|
-
INDEXED_SESSIONS_TABLE,
|
|
6
|
-
conversationHistorySchema,
|
|
7
|
-
indexedSessionsSchema,
|
|
8
|
-
} from "./conversation-history.schema.js";
|
|
9
|
-
import {
|
|
10
|
-
arrowVectorToArray,
|
|
11
|
-
getOrCreateTable,
|
|
12
|
-
createFtsMutex,
|
|
13
|
-
createRerankerMutex,
|
|
14
|
-
escapeLanceDbString,
|
|
15
|
-
} from "./lancedb-utils.js";
|
|
16
|
-
import type {
|
|
17
|
-
ConversationHistoryEntry,
|
|
18
|
-
ConversationHistoryHybridRow,
|
|
19
|
-
IndexedSession,
|
|
20
|
-
IndexedSessionSummary,
|
|
21
|
-
MessageRole,
|
|
22
|
-
} from "../types/conversation-history.js";
|
|
23
|
-
|
|
24
|
-
export class ConversationHistoryRepository {
|
|
25
|
-
// Cached table handles — initialized once, retained for instance lifetime
|
|
26
|
-
private tablePromise: Promise<Table> | null = null;
|
|
27
|
-
private sessionsTablePromise: Promise<Table> | null = null;
|
|
28
|
-
|
|
29
|
-
// FTS index mutex — once created, the promise is never cleared (index persists in LanceDB)
|
|
30
|
-
private ensureFtsIndex: () => Promise<void>;
|
|
31
|
-
|
|
32
|
-
// Cached reranker — k=60 is constant, no need to recreate per search
|
|
33
|
-
private getReranker = createRerankerMutex();
|
|
34
|
-
|
|
35
|
-
constructor(private db: lancedb.Connection) {
|
|
36
|
-
this.ensureFtsIndex = createFtsMutex(() => this.getTable());
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
private getTable(): Promise<Table> {
|
|
40
|
-
if (!this.tablePromise) {
|
|
41
|
-
this.tablePromise = getOrCreateTable(
|
|
42
|
-
this.db,
|
|
43
|
-
CONVERSATION_HISTORY_TABLE,
|
|
44
|
-
conversationHistorySchema
|
|
45
|
-
).catch((e) => {
|
|
46
|
-
this.tablePromise = null;
|
|
47
|
-
throw e;
|
|
48
|
-
});
|
|
49
|
-
}
|
|
50
|
-
return this.tablePromise;
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
private getSessionsTable(): Promise<Table> {
|
|
54
|
-
if (!this.sessionsTablePromise) {
|
|
55
|
-
this.sessionsTablePromise = getOrCreateTable(
|
|
56
|
-
this.db,
|
|
57
|
-
INDEXED_SESSIONS_TABLE,
|
|
58
|
-
indexedSessionsSchema
|
|
59
|
-
).catch((e) => {
|
|
60
|
-
this.sessionsTablePromise = null;
|
|
61
|
-
throw e;
|
|
62
|
-
});
|
|
63
|
-
}
|
|
64
|
-
return this.sessionsTablePromise;
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
private rowToEntry(row: Record<string, unknown>): ConversationHistoryEntry {
|
|
68
|
-
return {
|
|
69
|
-
id: row.id as string,
|
|
70
|
-
content: row.content as string,
|
|
71
|
-
embedding: arrowVectorToArray(row.vector),
|
|
72
|
-
sessionId: row.session_id as string,
|
|
73
|
-
role: row.role as MessageRole,
|
|
74
|
-
messageIndex: row.message_index as number,
|
|
75
|
-
timestamp: new Date(row.timestamp as number),
|
|
76
|
-
metadata: JSON.parse(row.metadata as string),
|
|
77
|
-
createdAt: new Date(row.created_at as number),
|
|
78
|
-
};
|
|
79
|
-
}
|
|
80
|
-
|
|
81
|
-
private rowToSessionSummary(
|
|
82
|
-
row: Record<string, unknown>
|
|
83
|
-
): IndexedSessionSummary {
|
|
84
|
-
return {
|
|
85
|
-
sessionId: row.session_id as string,
|
|
86
|
-
messageCount: row.message_count as number,
|
|
87
|
-
firstMessageAt: new Date(row.first_message_at as number),
|
|
88
|
-
lastMessageAt: new Date(row.last_message_at as number),
|
|
89
|
-
indexedAt: new Date(row.indexed_at as number),
|
|
90
|
-
// Use null check (not truthiness) — empty string is a valid value distinct from null
|
|
91
|
-
...(row.project != null ? { project: row.project as string } : {}),
|
|
92
|
-
...(row.git_branch != null ? { gitBranch: row.git_branch as string } : {}),
|
|
93
|
-
};
|
|
94
|
-
}
|
|
95
|
-
|
|
96
|
-
private rowToSession(row: Record<string, unknown>): IndexedSession {
|
|
97
|
-
return {
|
|
98
|
-
...this.rowToSessionSummary(row),
|
|
99
|
-
filePath: row.file_path as string,
|
|
100
|
-
fileSize: row.file_size as number,
|
|
101
|
-
};
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
// --- Conversation History Operations ---
|
|
105
|
-
|
|
106
|
-
async insert(entries: ConversationHistoryEntry[]): Promise<void> {
|
|
107
|
-
if (entries.length === 0) return;
|
|
108
|
-
|
|
109
|
-
const table = await this.getTable();
|
|
110
|
-
await table.add(
|
|
111
|
-
entries.map((entry) => ({
|
|
112
|
-
id: entry.id,
|
|
113
|
-
vector: entry.embedding,
|
|
114
|
-
content: entry.content,
|
|
115
|
-
session_id: entry.sessionId,
|
|
116
|
-
role: entry.role,
|
|
117
|
-
message_index: entry.messageIndex,
|
|
118
|
-
timestamp: entry.timestamp.getTime(),
|
|
119
|
-
metadata: JSON.stringify(entry.metadata),
|
|
120
|
-
created_at: entry.createdAt.getTime(),
|
|
121
|
-
}))
|
|
122
|
-
);
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
async findHybrid(
|
|
126
|
-
embedding: number[],
|
|
127
|
-
query: string,
|
|
128
|
-
limit: number
|
|
129
|
-
): Promise<ConversationHistoryHybridRow[]> {
|
|
130
|
-
await this.ensureFtsIndex();
|
|
131
|
-
|
|
132
|
-
const table = await this.getTable();
|
|
133
|
-
const reranker = await this.getReranker();
|
|
134
|
-
|
|
135
|
-
const results = await table
|
|
136
|
-
.query()
|
|
137
|
-
.nearestTo(embedding)
|
|
138
|
-
.fullTextSearch(query)
|
|
139
|
-
.rerank(reranker)
|
|
140
|
-
.limit(limit)
|
|
141
|
-
.toArray();
|
|
142
|
-
|
|
143
|
-
return results.map((row) => {
|
|
144
|
-
const entry = this.rowToEntry(row as Record<string, unknown>);
|
|
145
|
-
return {
|
|
146
|
-
...entry,
|
|
147
|
-
rrfScore: (row._relevance_score as number) ?? 0,
|
|
148
|
-
};
|
|
149
|
-
});
|
|
150
|
-
}
|
|
151
|
-
|
|
152
|
-
async findBySessionId(sessionId: string): Promise<ConversationHistoryEntry[]> {
|
|
153
|
-
const table = await this.getTable();
|
|
154
|
-
const results = await table
|
|
155
|
-
.query()
|
|
156
|
-
.where(`session_id = '${escapeLanceDbString(sessionId)}'`)
|
|
157
|
-
.toArray();
|
|
158
|
-
|
|
159
|
-
return results.map((row) =>
|
|
160
|
-
this.rowToEntry(row as Record<string, unknown>)
|
|
161
|
-
);
|
|
162
|
-
}
|
|
163
|
-
|
|
164
|
-
async deleteBySessionId(sessionId: string): Promise<number> {
|
|
165
|
-
const table = await this.getTable();
|
|
166
|
-
|
|
167
|
-
// Select only id — avoids deserializing embedding vectors just for a count
|
|
168
|
-
const existing = await table
|
|
169
|
-
.query()
|
|
170
|
-
.where(`session_id = '${escapeLanceDbString(sessionId)}'`)
|
|
171
|
-
.select(["id"])
|
|
172
|
-
.toArray();
|
|
173
|
-
const count = existing.length;
|
|
174
|
-
|
|
175
|
-
if (count > 0) {
|
|
176
|
-
await table.delete(`session_id = '${escapeLanceDbString(sessionId)}'`);
|
|
177
|
-
}
|
|
178
|
-
|
|
179
|
-
return count;
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
// --- Indexed Sessions Tracking ---
|
|
183
|
-
|
|
184
|
-
async getIndexedSession(
|
|
185
|
-
sessionId: string
|
|
186
|
-
): Promise<IndexedSession | null> {
|
|
187
|
-
const table = await this.getSessionsTable();
|
|
188
|
-
const results = await table
|
|
189
|
-
.query()
|
|
190
|
-
.where(`session_id = '${escapeLanceDbString(sessionId)}'`)
|
|
191
|
-
.limit(1)
|
|
192
|
-
.toArray();
|
|
193
|
-
|
|
194
|
-
if (results.length === 0) {
|
|
195
|
-
return null;
|
|
196
|
-
}
|
|
197
|
-
|
|
198
|
-
return this.rowToSession(results[0] as Record<string, unknown>);
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
async upsertIndexedSession(session: IndexedSession): Promise<void> {
|
|
202
|
-
const table = await this.getSessionsTable();
|
|
203
|
-
const existing = await table
|
|
204
|
-
.query()
|
|
205
|
-
.where(`session_id = '${escapeLanceDbString(session.sessionId)}'`)
|
|
206
|
-
.limit(1)
|
|
207
|
-
.toArray();
|
|
208
|
-
|
|
209
|
-
const row = {
|
|
210
|
-
session_id: session.sessionId,
|
|
211
|
-
file_path: session.filePath,
|
|
212
|
-
file_size: session.fileSize,
|
|
213
|
-
message_count: session.messageCount,
|
|
214
|
-
first_message_at: session.firstMessageAt.getTime(),
|
|
215
|
-
last_message_at: session.lastMessageAt.getTime(),
|
|
216
|
-
indexed_at: session.indexedAt.getTime(),
|
|
217
|
-
project: session.project ?? null,
|
|
218
|
-
git_branch: session.gitBranch ?? null,
|
|
219
|
-
};
|
|
220
|
-
|
|
221
|
-
if (existing.length === 0) {
|
|
222
|
-
await table.add([row]);
|
|
223
|
-
} else {
|
|
224
|
-
await table.update({
|
|
225
|
-
where: `session_id = '${escapeLanceDbString(session.sessionId)}'`,
|
|
226
|
-
values: row,
|
|
227
|
-
});
|
|
228
|
-
}
|
|
229
|
-
}
|
|
230
|
-
|
|
231
|
-
async listIndexedSessions(): Promise<IndexedSessionSummary[]> {
|
|
232
|
-
const table = await this.getSessionsTable();
|
|
233
|
-
const results = await table.query().toArray();
|
|
234
|
-
|
|
235
|
-
return results.map((row) =>
|
|
236
|
-
this.rowToSessionSummary(row as Record<string, unknown>)
|
|
237
|
-
);
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
async deleteIndexedSession(sessionId: string): Promise<boolean> {
|
|
241
|
-
const table = await this.getSessionsTable();
|
|
242
|
-
const existing = await table
|
|
243
|
-
.query()
|
|
244
|
-
.where(`session_id = '${escapeLanceDbString(sessionId)}'`)
|
|
245
|
-
.limit(1)
|
|
246
|
-
.toArray();
|
|
247
|
-
|
|
248
|
-
if (existing.length === 0) {
|
|
249
|
-
return false;
|
|
250
|
-
}
|
|
251
|
-
|
|
252
|
-
await table.delete(`session_id = '${escapeLanceDbString(sessionId)}'`);
|
|
253
|
-
return true;
|
|
254
|
-
}
|
|
255
|
-
}
|
|
@@ -1,40 +0,0 @@
|
|
|
1
|
-
import {
|
|
2
|
-
Schema,
|
|
3
|
-
Field,
|
|
4
|
-
Utf8,
|
|
5
|
-
Int32,
|
|
6
|
-
Float64,
|
|
7
|
-
} from "apache-arrow";
|
|
8
|
-
import { vectorField, timestampField } from "./schema.js";
|
|
9
|
-
|
|
10
|
-
export const CONVERSATION_HISTORY_TABLE = "conversation_history";
|
|
11
|
-
|
|
12
|
-
/**
|
|
13
|
-
* Tracks which sessions have been indexed and their file sizes,
|
|
14
|
-
* enabling idempotent incremental indexing.
|
|
15
|
-
*/
|
|
16
|
-
export const INDEXED_SESSIONS_TABLE = "indexed_sessions";
|
|
17
|
-
|
|
18
|
-
export const conversationHistorySchema = new Schema([
|
|
19
|
-
new Field("id", new Utf8(), false),
|
|
20
|
-
vectorField(),
|
|
21
|
-
new Field("content", new Utf8(), false),
|
|
22
|
-
new Field("session_id", new Utf8(), false),
|
|
23
|
-
new Field("role", new Utf8(), false), // "user" | "assistant"
|
|
24
|
-
new Field("message_index", new Int32(), false),
|
|
25
|
-
timestampField("timestamp"),
|
|
26
|
-
new Field("metadata", new Utf8(), false), // JSON string
|
|
27
|
-
timestampField("created_at"),
|
|
28
|
-
]);
|
|
29
|
-
|
|
30
|
-
export const indexedSessionsSchema = new Schema([
|
|
31
|
-
new Field("session_id", new Utf8(), false),
|
|
32
|
-
new Field("file_path", new Utf8(), false),
|
|
33
|
-
new Field("file_size", new Float64(), false), // Float64 avoids Int32 overflow and BigInt handling
|
|
34
|
-
new Field("message_count", new Int32(), false),
|
|
35
|
-
timestampField("first_message_at"),
|
|
36
|
-
timestampField("last_message_at"),
|
|
37
|
-
timestampField("indexed_at"),
|
|
38
|
-
new Field("project", new Utf8(), true), // Nullable
|
|
39
|
-
new Field("git_branch", new Utf8(), true), // Nullable
|
|
40
|
-
]);
|
|
@@ -1,320 +0,0 @@
|
|
|
1
|
-
import { readdir, stat } from "fs/promises";
|
|
2
|
-
import { join } from "path";
|
|
3
|
-
import type { ConversationHistoryRepository } from "../db/conversation-history.repository.js";
|
|
4
|
-
import type { EmbeddingsService } from "./embeddings.service.js";
|
|
5
|
-
import {
|
|
6
|
-
parseSessionFile,
|
|
7
|
-
discoverSessionFiles,
|
|
8
|
-
detectSessionPath,
|
|
9
|
-
type ParsedMessage,
|
|
10
|
-
type ParseResult,
|
|
11
|
-
type SessionFileInfo,
|
|
12
|
-
} from "./session-parser.js";
|
|
13
|
-
import type {
|
|
14
|
-
ConversationHistoryEntry,
|
|
15
|
-
HistorySearchResult,
|
|
16
|
-
IndexedSession,
|
|
17
|
-
IndexedSessionSummary,
|
|
18
|
-
IndexingSummary,
|
|
19
|
-
} from "../types/conversation-history.js";
|
|
20
|
-
|
|
21
|
-
const EMBED_BATCH_SIZE = 50;
|
|
22
|
-
|
|
23
|
-
export class ConversationHistoryService {
|
|
24
|
-
constructor(
|
|
25
|
-
private repository: ConversationHistoryRepository,
|
|
26
|
-
private embeddings: EmbeddingsService,
|
|
27
|
-
private sessionPath: string | null, // null = auto-detect
|
|
28
|
-
) {}
|
|
29
|
-
|
|
30
|
-
/**
|
|
31
|
-
* Index all conversation sessions found in the session directory.
|
|
32
|
-
*
|
|
33
|
-
* For each .jsonl file discovered:
|
|
34
|
-
* - New (not tracked): full parse from byte 0
|
|
35
|
-
* - Grown (fileSize increased): incremental parse from last-known size
|
|
36
|
-
* - Shrunk (fileSize decreased — file replaced): delete + full reindex
|
|
37
|
-
* - Unchanged (same fileSize): skip
|
|
38
|
-
*/
|
|
39
|
-
async indexConversations(sessionDir?: string): Promise<IndexingSummary> {
|
|
40
|
-
const allFiles = await this.discoverAllFiles(sessionDir);
|
|
41
|
-
|
|
42
|
-
// Bulk-fetch all tracked sessions into a Map to avoid N+1 lookups
|
|
43
|
-
const trackedSessions = await this.buildSessionIndex();
|
|
44
|
-
|
|
45
|
-
const summary: IndexingSummary = {
|
|
46
|
-
sessionsDiscovered: allFiles.length,
|
|
47
|
-
sessionsIndexed: 0,
|
|
48
|
-
sessionsSkipped: 0,
|
|
49
|
-
messagesIndexed: 0,
|
|
50
|
-
};
|
|
51
|
-
|
|
52
|
-
for (const file of allFiles) {
|
|
53
|
-
const indexed = trackedSessions.get(file.sessionId) ?? null;
|
|
54
|
-
|
|
55
|
-
if (indexed && indexed.fileSize === file.fileSize) {
|
|
56
|
-
// Unchanged — skip
|
|
57
|
-
summary.sessionsSkipped++;
|
|
58
|
-
continue;
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
if (indexed && file.fileSize < indexed.fileSize) {
|
|
62
|
-
// Shrunk — file was replaced, full reindex
|
|
63
|
-
await this.repository.deleteBySessionId(file.sessionId);
|
|
64
|
-
await this.repository.deleteIndexedSession(file.sessionId);
|
|
65
|
-
const count = await this.indexFile(file, 0, 0, null);
|
|
66
|
-
summary.sessionsIndexed++;
|
|
67
|
-
summary.messagesIndexed += count;
|
|
68
|
-
continue;
|
|
69
|
-
}
|
|
70
|
-
|
|
71
|
-
if (indexed && file.fileSize > indexed.fileSize) {
|
|
72
|
-
// Grown — incremental parse from where we left off
|
|
73
|
-
const count = await this.indexFile(
|
|
74
|
-
file,
|
|
75
|
-
indexed.fileSize,
|
|
76
|
-
indexed.messageCount,
|
|
77
|
-
indexed,
|
|
78
|
-
);
|
|
79
|
-
summary.sessionsIndexed++;
|
|
80
|
-
summary.messagesIndexed += count;
|
|
81
|
-
continue;
|
|
82
|
-
}
|
|
83
|
-
|
|
84
|
-
// New — full parse
|
|
85
|
-
const count = await this.indexFile(file, 0, 0, null);
|
|
86
|
-
summary.sessionsIndexed++;
|
|
87
|
-
summary.messagesIndexed += count;
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
return summary;
|
|
91
|
-
}
|
|
92
|
-
|
|
93
|
-
/**
|
|
94
|
-
* Search conversation history using hybrid (vector + FTS) search.
|
|
95
|
-
*/
|
|
96
|
-
async search(query: string, limit: number): Promise<HistorySearchResult[]> {
|
|
97
|
-
const embedding = await this.embeddings.embed(query);
|
|
98
|
-
const rows = await this.repository.findHybrid(embedding, query, limit);
|
|
99
|
-
|
|
100
|
-
return rows.map((row) => ({
|
|
101
|
-
source: "conversation_history" as const,
|
|
102
|
-
id: row.id,
|
|
103
|
-
content: row.content,
|
|
104
|
-
metadata: row.metadata,
|
|
105
|
-
score: row.rrfScore,
|
|
106
|
-
sessionId: row.sessionId,
|
|
107
|
-
role: row.role,
|
|
108
|
-
messageIndex: row.messageIndex,
|
|
109
|
-
timestamp: row.timestamp,
|
|
110
|
-
}));
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
/**
|
|
114
|
-
* List all indexed sessions (pass-through to repository).
|
|
115
|
-
*/
|
|
116
|
-
async listIndexedSessions(): Promise<IndexedSessionSummary[]> {
|
|
117
|
-
return this.repository.listIndexedSessions();
|
|
118
|
-
}
|
|
119
|
-
|
|
120
|
-
/**
|
|
121
|
-
* Force a full reindex of a specific session.
|
|
122
|
-
* Deletes all existing entries and tracking, then re-parses from byte 0.
|
|
123
|
-
*/
|
|
124
|
-
async reindexSession(sessionId: string): Promise<IndexingSummary> {
|
|
125
|
-
const indexed = await this.repository.getIndexedSession(sessionId);
|
|
126
|
-
|
|
127
|
-
const summary: IndexingSummary = {
|
|
128
|
-
sessionsDiscovered: 1,
|
|
129
|
-
sessionsIndexed: 0,
|
|
130
|
-
sessionsSkipped: 0,
|
|
131
|
-
messagesIndexed: 0,
|
|
132
|
-
};
|
|
133
|
-
|
|
134
|
-
if (!indexed) {
|
|
135
|
-
// Nothing to reindex — no tracking record means we don't know the file path
|
|
136
|
-
summary.sessionsSkipped = 1;
|
|
137
|
-
return summary;
|
|
138
|
-
}
|
|
139
|
-
|
|
140
|
-
// Delete existing data
|
|
141
|
-
await this.repository.deleteBySessionId(sessionId);
|
|
142
|
-
await this.repository.deleteIndexedSession(sessionId);
|
|
143
|
-
|
|
144
|
-
// Get current file size (file may have changed since last index)
|
|
145
|
-
let fileSize: number;
|
|
146
|
-
try {
|
|
147
|
-
const stats = await stat(indexed.filePath);
|
|
148
|
-
fileSize = stats.size;
|
|
149
|
-
} catch {
|
|
150
|
-
// File no longer exists
|
|
151
|
-
summary.sessionsSkipped = 1;
|
|
152
|
-
return summary;
|
|
153
|
-
}
|
|
154
|
-
|
|
155
|
-
const fileInfo: SessionFileInfo = {
|
|
156
|
-
sessionId,
|
|
157
|
-
filePath: indexed.filePath,
|
|
158
|
-
fileSize,
|
|
159
|
-
};
|
|
160
|
-
|
|
161
|
-
const count = await this.indexFile(fileInfo, 0, 0, null);
|
|
162
|
-
summary.sessionsIndexed = 1;
|
|
163
|
-
summary.messagesIndexed = count;
|
|
164
|
-
return summary;
|
|
165
|
-
}
|
|
166
|
-
|
|
167
|
-
// --- Private helpers ---
|
|
168
|
-
|
|
169
|
-
/**
|
|
170
|
-
* Bulk-fetch all tracked sessions into a Map for O(1) lookups.
|
|
171
|
-
* Uses listIndexedSessions() which returns summaries, but we need full
|
|
172
|
-
* IndexedSession records. We call getIndexedSession() is avoided by using
|
|
173
|
-
* a repository method that returns all sessions with full details.
|
|
174
|
-
*
|
|
175
|
-
* Note: listIndexedSessions returns IndexedSessionSummary (no filePath/fileSize),
|
|
176
|
-
* so we use getIndexedSession per unique session. However, we batch this via
|
|
177
|
-
* the list + individual fetches only when needed. For now, we fetch all as
|
|
178
|
-
* summaries and promote to full records via individual lookups grouped upfront.
|
|
179
|
-
*/
|
|
180
|
-
private async buildSessionIndex(): Promise<Map<string, IndexedSession>> {
|
|
181
|
-
const summaries = await this.repository.listIndexedSessions();
|
|
182
|
-
const sessionMap = new Map<string, IndexedSession>();
|
|
183
|
-
|
|
184
|
-
// Fetch full records in parallel for all known sessions
|
|
185
|
-
const fullRecords = await Promise.all(
|
|
186
|
-
summaries.map((s) => this.repository.getIndexedSession(s.sessionId)),
|
|
187
|
-
);
|
|
188
|
-
|
|
189
|
-
for (const record of fullRecords) {
|
|
190
|
-
if (record) {
|
|
191
|
-
sessionMap.set(record.sessionId, record);
|
|
192
|
-
}
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
return sessionMap;
|
|
196
|
-
}
|
|
197
|
-
|
|
198
|
-
/**
|
|
199
|
-
* Discover all .jsonl files across resolved session directories.
|
|
200
|
-
* Resolves dirs and discovers files in one pass to avoid double-scanning.
|
|
201
|
-
*/
|
|
202
|
-
private async discoverAllFiles(sessionDir?: string): Promise<SessionFileInfo[]> {
|
|
203
|
-
const base = sessionDir ?? this.sessionPath ?? detectSessionPath();
|
|
204
|
-
if (!base) return [];
|
|
205
|
-
|
|
206
|
-
// Check if base dir itself has .jsonl files
|
|
207
|
-
const rootFiles = await discoverSessionFiles(base);
|
|
208
|
-
if (rootFiles.length > 0) return rootFiles;
|
|
209
|
-
|
|
210
|
-
// Otherwise enumerate subdirectories and discover files in each
|
|
211
|
-
const dirs = await this.listSubdirectories(base);
|
|
212
|
-
const nested = await Promise.all(dirs.map((d) => discoverSessionFiles(d)));
|
|
213
|
-
return nested.flat();
|
|
214
|
-
}
|
|
215
|
-
|
|
216
|
-
/**
|
|
217
|
-
* List immediate subdirectories of a path. Stat calls are parallelized.
|
|
218
|
-
*/
|
|
219
|
-
private async listSubdirectories(base: string): Promise<string[]> {
|
|
220
|
-
let entries: string[];
|
|
221
|
-
try {
|
|
222
|
-
entries = await readdir(base);
|
|
223
|
-
} catch {
|
|
224
|
-
return [];
|
|
225
|
-
}
|
|
226
|
-
|
|
227
|
-
const results = await Promise.allSettled(
|
|
228
|
-
entries.map(async (entry) => {
|
|
229
|
-
const fullPath = join(base, entry);
|
|
230
|
-
const stats = await stat(fullPath);
|
|
231
|
-
return stats.isDirectory() ? fullPath : null;
|
|
232
|
-
}),
|
|
233
|
-
);
|
|
234
|
-
|
|
235
|
-
return results
|
|
236
|
-
.filter((r): r is PromiseFulfilledResult<string | null> => r.status === "fulfilled")
|
|
237
|
-
.map((r) => r.value)
|
|
238
|
-
.filter((v): v is string => v != null);
|
|
239
|
-
}
|
|
240
|
-
|
|
241
|
-
/**
|
|
242
|
-
* Parse a session file, embed messages in batches, insert into repository,
|
|
243
|
-
* and upsert the tracking record. Returns count of messages indexed.
|
|
244
|
-
*/
|
|
245
|
-
private async indexFile(
|
|
246
|
-
file: SessionFileInfo,
|
|
247
|
-
fromByte: number,
|
|
248
|
-
startIndex: number,
|
|
249
|
-
existing: IndexedSession | null,
|
|
250
|
-
): Promise<number> {
|
|
251
|
-
const parseResult = await parseSessionFile(
|
|
252
|
-
file.filePath,
|
|
253
|
-
fromByte,
|
|
254
|
-
startIndex,
|
|
255
|
-
file.fileSize,
|
|
256
|
-
);
|
|
257
|
-
|
|
258
|
-
if (parseResult.messages.length === 0) {
|
|
259
|
-
// Still upsert tracking so we don't re-parse an empty/no-new-content file
|
|
260
|
-
await this.upsertTracking(file, parseResult.messages, startIndex, parseResult, existing);
|
|
261
|
-
return 0;
|
|
262
|
-
}
|
|
263
|
-
|
|
264
|
-
// Embed and insert in batches
|
|
265
|
-
for (let i = 0; i < parseResult.messages.length; i += EMBED_BATCH_SIZE) {
|
|
266
|
-
const batch = parseResult.messages.slice(i, i + EMBED_BATCH_SIZE);
|
|
267
|
-
const texts = batch.map((m) => m.content);
|
|
268
|
-
const embeddings = await this.embeddings.embedBatch(texts);
|
|
269
|
-
|
|
270
|
-
const entries: ConversationHistoryEntry[] = batch.map((msg, idx) => ({
|
|
271
|
-
id: msg.id,
|
|
272
|
-
content: msg.content,
|
|
273
|
-
embedding: embeddings[idx],
|
|
274
|
-
sessionId: msg.sessionId,
|
|
275
|
-
role: msg.role,
|
|
276
|
-
messageIndex: msg.messageIndex,
|
|
277
|
-
timestamp: msg.timestamp,
|
|
278
|
-
metadata: msg.metadata,
|
|
279
|
-
createdAt: new Date(),
|
|
280
|
-
}));
|
|
281
|
-
|
|
282
|
-
await this.repository.insert(entries);
|
|
283
|
-
}
|
|
284
|
-
|
|
285
|
-
await this.upsertTracking(file, parseResult.messages, startIndex, parseResult, existing);
|
|
286
|
-
return parseResult.messages.length;
|
|
287
|
-
}
|
|
288
|
-
|
|
289
|
-
/**
|
|
290
|
-
* Upsert the indexed session tracking record.
|
|
291
|
-
* For incremental indexing, merges timestamps with the existing record.
|
|
292
|
-
*/
|
|
293
|
-
private async upsertTracking(
|
|
294
|
-
file: SessionFileInfo,
|
|
295
|
-
newMessages: ParsedMessage[],
|
|
296
|
-
startIndex: number,
|
|
297
|
-
parseResult: ParseResult,
|
|
298
|
-
existing: IndexedSession | null,
|
|
299
|
-
): Promise<void> {
|
|
300
|
-
const totalMessageCount = startIndex + newMessages.length;
|
|
301
|
-
const firstMessageAt =
|
|
302
|
-
existing?.firstMessageAt ?? parseResult.firstMessageAt ?? new Date();
|
|
303
|
-
const lastMessageAt =
|
|
304
|
-
parseResult.lastMessageAt ?? existing?.lastMessageAt ?? new Date();
|
|
305
|
-
|
|
306
|
-
const session: IndexedSession = {
|
|
307
|
-
sessionId: file.sessionId,
|
|
308
|
-
filePath: file.filePath,
|
|
309
|
-
fileSize: file.fileSize,
|
|
310
|
-
messageCount: totalMessageCount,
|
|
311
|
-
firstMessageAt,
|
|
312
|
-
lastMessageAt,
|
|
313
|
-
indexedAt: new Date(),
|
|
314
|
-
...(parseResult.project ? { project: parseResult.project } : existing?.project ? { project: existing.project } : {}),
|
|
315
|
-
...(parseResult.gitBranch ? { gitBranch: parseResult.gitBranch } : existing?.gitBranch ? { gitBranch: existing.gitBranch } : {}),
|
|
316
|
-
};
|
|
317
|
-
|
|
318
|
-
await this.repository.upsertIndexedSession(session);
|
|
319
|
-
}
|
|
320
|
-
}
|